wangyum · pull · Sep 19, 2022 · Sep 19, 2022 · Sep 19, 2022 · Sep 19, 2022
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -151,4 +151,7 @@ WEB UI:
   - "**/*UI.scala"
 DEPLOY:
   - "sbin/**/*"
-
+CONNECT:
+  - "connect/**/*"
+  - "**/sql/sparkconnect/**/*"
+  - "python/pyspark/sql/**/connect/**/*"
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
@@ -555,6 +555,11 @@
           "AES-<mode> with the padding <padding> by the <functionName> function."
         ]
       },
+      "CATALOG_OPERATION" : {
+        "message" : [
+          "Catalog <catalogName> does not support <operation>."
+        ]
+      },
       "DESC_TABLE_COLUMN_PARTITION" : {
         "message" : [
           "DESC TABLE COLUMN for a specific partition."

diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -300,10 +300,15 @@ abstract class SparkFunSuite
       parameters: Map[String, String] = Map.empty,
       matchPVals: Boolean = false,
       queryContext: Array[QueryContext] = Array.empty): Unit = {
-    assert(exception.getErrorClass === errorClass)
+    val mainErrorClass :: tail = errorClass.split("\\.").toList
+    assert(tail.isEmpty || tail.length == 1)
+    // TODO: remove the `errorSubClass` parameter.
+    assert(tail.isEmpty || errorSubClass.isEmpty)
+    assert(exception.getErrorClass === mainErrorClass)
     if (exception.getErrorSubClass != null) {
-      assert(errorSubClass.isDefined)
-      assert(exception.getErrorSubClass === errorSubClass.get)
+      val subClass = errorSubClass.orElse(tail.headOption)
+      assert(subClass.isDefined)
+      assert(exception.getErrorSubClass === subClass.get)
     }
     sqlState.foreach(state => assert(exception.getSqlState === state))
     val expectedParameters = exception.getMessageParameters.asScala

diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala
@@ -20,13 +20,14 @@ package org.apache.spark.deploy.history
 import org.openqa.selenium.WebDriver
 import org.openqa.selenium.chrome.{ChromeDriver, ChromeOptions}
 
-import org.apache.spark.tags.ChromeUITest
+import org.apache.spark.internal.config.History.HybridStoreDiskBackend
+import org.apache.spark.tags.{ChromeUITest, ExtendedLevelDBTest}
+
 
 /**
  * Tests for HistoryServer with Chrome.
  */
-@ChromeUITest
-class ChromeUIHistoryServerSuite
+abstract class ChromeUIHistoryServerSuite
   extends RealBrowserUIHistoryServerSuite("webdriver.chrome.driver") {
 
   override var webDriver: WebDriver = _
@@ -48,3 +49,14 @@ class ChromeUIHistoryServerSuite
     }
   }
 }
+
+@ChromeUITest
+@ExtendedLevelDBTest
+class LevelDBBackendChromeUIHistoryServerSuite extends ChromeUIHistoryServerSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.LEVELDB
+}
+
+@ChromeUITest
+class RocksDBBackendChromeUIHistoryServerSuite extends ChromeUIHistoryServerSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.ROCKSDB
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config.{EVENT_LOG_STAGE_EXECUTOR_METRICS, EXECUTOR_PROCESS_TREE_METRICS_ENABLED}
-import org.apache.spark.internal.config.History.{HISTORY_LOG_DIR, LOCAL_STORE_DIR, UPDATE_INTERVAL_S}
+import org.apache.spark.internal.config.History.{HISTORY_LOG_DIR, HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend, LOCAL_STORE_DIR, UPDATE_INTERVAL_S}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
@@ -48,6 +48,8 @@ abstract class RealBrowserUIHistoryServerSuite(val driverProp: String)
   private var server: HistoryServer = null
   private var port: Int = -1
 
+  protected def diskBackend: HybridStoreDiskBackend.Value
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     assume(
@@ -79,6 +81,7 @@ abstract class RealBrowserUIHistoryServerSuite(val driverProp: String)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
       .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true)
+      .set(HYBRID_STORE_DISK_BACKEND, diskBackend.toString)
     conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()

diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -187,10 +187,10 @@ lapack/3.0.2//lapack-3.0.2.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.18.0//log4j-1.2-api-2.18.0.jar
-log4j-api/2.18.0//log4j-api-2.18.0.jar
-log4j-core/2.18.0//log4j-core-2.18.0.jar
-log4j-slf4j-impl/2.18.0//log4j-slf4j-impl-2.18.0.jar
+log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar
+log4j-api/2.19.0//log4j-api-2.19.0.jar
+log4j-core/2.19.0//log4j-core-2.19.0.jar
+log4j-slf4j-impl/2.19.0//log4j-slf4j-impl-2.19.0.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -171,10 +171,10 @@ lapack/3.0.2//lapack-3.0.2.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.18.0//log4j-1.2-api-2.18.0.jar
-log4j-api/2.18.0//log4j-api-2.18.0.jar
-log4j-core/2.18.0//log4j-core-2.18.0.jar
-log4j-slf4j-impl/2.18.0//log4j-slf4j-impl-2.18.0.jar
+log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar
+log4j-api/2.19.0//log4j-api-2.19.0.jar
+log4j-core/2.19.0//log4j-core-2.19.0.jar
+log4j-slf4j-impl/2.19.0//log4j-slf4j-impl-2.19.0.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
@@ -852,9 +852,11 @@ The following extra configuration options are available when the shuffle service
   <td><code>spark.shuffle.service.db.backend</code></td>
   <td>LEVELDB</td>
   <td>
-    To specify the kind of disk-base store used in shuffle service state store, supports `LEVELDB` and `ROCKSDB` now 
-    and `LEVELDB` as default value. 
-    The original data store in `LevelDB/RocksDB` will not be automatically convert to another kind of storage now.
+    When work-preserving restart is enabled in YARN, this is used to specify the disk-base store used 
+    in shuffle service state store, supports `LEVELDB` and `ROCKSDB` with `LEVELDB` as default value. 
+    The original data store in `LevelDB/RocksDB` will not be automatically converted to another kind 
+    of storage now. The original data store will be retained and the new type data store will be 
+    created when switching storage types.
   </td>
   <td>3.4.0</td>
 </tr>

diff --git a/pom.xml b/pom.xml
@@ -113,7 +113,7 @@
     <exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.36</slf4j.version>
-    <log4j.version>2.18.0</log4j.version>
+    <log4j.version>2.19.0</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
     <hadoop.version>3.3.4</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
@@ -1448,7 +1448,7 @@ def corr(self, method: str = "pearson", min_periods: Optional[int] = None) -> "D
         1. Pearson, Kendall and Spearman correlation are currently computed using pairwise
            complete observations.
 
-        2. The complexity of Spearman correlation is O(#row * #row), if the dataset is too
+        2. The complexity of Kendall correlation is O(#row * #row), if the dataset is too
            large, sampling ahead of correlation computation is recommended.
 
         Examples