apache
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 20 additions & 6 deletions b/‎.github/workflows/build_and_test.yml‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎binder/apt.txt‎
Lines changed: 1 addition & 0 deletions b/‎binder/apt.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎binder/postBuild‎
Lines changed: 24 additions & 0 deletions b/‎binder/postBuild‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java‎
Lines changed: 1 addition & 1 deletion b/‎common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/api/plugin/DriverPlugin.java‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/java/org/apache/spark/api/plugin/DriverPlugin.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 4 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala‎
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala‎
Lines changed: 4 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala‎
Lines changed: 1 addition & 1 deletion
@@ -7,6 +7,11 @@ on:
   pull_request:
     branches:
     - master
+  workflow_dispatch:
+    inputs:
+      target:
+        description: 'Target branch to run'
+        required: true
 
 jobs:
   # Build: build Spark and run the tests for specified modules.
@@ -82,18 +87,26 @@ jobs:
       # GitHub Actions' default miniconda to use in pip packaging test.
       CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
+      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       # In order to fetch changed files
       with:
         fetch-depth: 0
+    - name: Merge dispatched input branch
+      if: ${{ github.event.inputs.target != '' }}
+      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v1
+      uses: actions/cache@v2
       with:
-        path: build
-        key: build-${{ hashFiles('**/pom.xml') }}
+        path: |
+          build/apache-maven-*
+          build/zinc-*
+          build/scala-*
+          build/*.jar
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
     - name: Cache Maven local repository
@@ -107,7 +120,7 @@ jobs:
       uses: actions/cache@v2
       with:
         path: ~/.ivy2/cache
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
     - name: Install JDK ${{ matrix.java }}
@@ -217,7 +230,7 @@ jobs:
       run: |
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
+        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
     - name: Install R 4.0
       run: |
         sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
@@ -236,10 +249,11 @@ jobs:
         ruby-version: 2.7
     - name: Install dependencies for documentation generation
       run: |
+        # pandoc is required to generate PySpark APIs as well in nbsphinx.
         sudo apt-get install -y libcurl4-openssl-dev pandoc
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
+        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
         gem install jekyll jekyll-redirect-from rouge
         sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
     - name: Scala linter
 
@@ -0,0 +1 @@
+openjdk-8-jre
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is used for Binder integration to install PySpark available in
+# Jupyter notebook.
+
+VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)")
+pip install "pyspark[sql,ml,mllib]<=$VERSION"
@@ -290,7 +290,7 @@ public boolean sharedByteBufAllocators() {
   }
 
   /**
-  * If enabled then off-heap byte buffers will be prefered for the shared ByteBuf allocators.
+  * If enabled then off-heap byte buffers will be preferred for the shared ByteBuf allocators.
   */
   public boolean preferDirectBufsForSharedByteBufAllocators() {
     return conf.getBoolean("spark.network.io.preferDirectBufs", true);
 
@@ -41,7 +41,7 @@ public interface DriverPlugin {
    * initialization.
    * <p>
    * It's recommended that plugins be careful about what operations are performed in this call,
-   * preferrably performing expensive operations in a separate thread, or postponing them until
+   * preferably performing expensive operations in a separate thread, or postponing them until
    * the application has fully started.
    *
    * @param sc The SparkContext loading the plugin.
 
@@ -88,7 +88,7 @@ private[spark] trait ExecutorAllocationClient {
    * Default implementation delegates to kill, scheduler must override
    * if it supports graceful decommissioning.
    *
-   * @param executorsAndDecominfo identifiers of executors & decom info.
+   * @param executorsAndDecomInfo identifiers of executors & decom info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
 
@@ -279,6 +279,9 @@ private[spark] class ExecutorAllocationManager(
     numExecutorsTargetPerResourceProfileId.keys.foreach { rpId =>
       numExecutorsTargetPerResourceProfileId(rpId) = initialNumExecutors
     }
+    numExecutorsToAddPerResourceProfileId.keys.foreach { rpId =>
+      numExecutorsToAddPerResourceProfileId(rpId) = 1
+    }
     executorMonitor.reset()
   }
 
@@ -595,7 +598,7 @@ private[spark] class ExecutorAllocationManager(
     // reset the newExecutorTotal to the existing number of executors
     if (testing || executorsRemoved.nonEmpty) {
       if (decommissionEnabled) {
-        executorMonitor.executorsDecommissioned(executorsRemoved)
+        executorMonitor.executorsDecommissioned(executorsRemoved.toSeq)
       } else {
         executorMonitor.executorsKilled(executorsRemoved.toSeq)
       }
 
@@ -33,8 +33,9 @@ private class HistoryServerMemoryManager(
     conf: SparkConf) extends Logging {
 
   private val maxUsage = conf.get(MAX_IN_MEMORY_STORE_USAGE)
-  private val currentUsage = new AtomicLong(0L)
-  private val active = new HashMap[(String, Option[String]), Long]()
+  // Visible for testing.
+  private[history] val currentUsage = new AtomicLong(0L)
+  private[history] val active = new HashMap[(String, Option[String]), Long]()
 
   def initialize(): Unit = {
     logInfo("Initialized memory manager: " +
 
@@ -54,7 +54,8 @@ private[history] class HybridStore extends KVStore {
   private var backgroundThread: Thread = null
 
   // A hash map that stores all classes that had been writen to inMemoryStore
-  private val klassMap = new ConcurrentHashMap[Class[_], Boolean]
+  // Visible for testing
+  private[history] val klassMap = new ConcurrentHashMap[Class[_], Boolean]
 
   override def getMetadata[T](klass: Class[T]): T = {
     getStore().getMetadata(klass)
@@ -165,8 +166,9 @@ private[history] class HybridStore extends KVStore {
 
   /**
    * This method return the store that we should use.
+   * Visible for testing.
    */
-  private def getStore(): KVStore = {
+  private[history] def getStore(): KVStore = {
     if (shouldUseInMemoryStore.get) {
       inMemoryStore
     } else {
 
@@ -29,7 +29,7 @@ import org.apache.spark.util.Utils.executeAndGetOutput
 /**
  * The default plugin that is loaded into a Spark application to control how custom
  * resources are discovered. This executes the discovery script specified by the user
- * and gets the json output back and contructs ResourceInformation objects from that.
+ * and gets the json output back and constructs ResourceInformation objects from that.
  * If the user specifies custom plugins, this is the last one to be executed and
  * throws if the resource isn't discovered.
  *
Original file line number	Diff line number	Diff line change
`@@ -290,7 +290,7 @@ public boolean sharedByteBufAllocators() {`
`290`	`290`	`}`
`291`	`291`
`292`	`292`	`/**`
`293`		`- * If enabled then off-heap byte buffers will be prefered for the shared ByteBuf allocators.`
	`293`	`+ * If enabled then off-heap byte buffers will be preferred for the shared ByteBuf allocators.`
`294`	`294`	`*/`
`295`	`295`	`public boolean preferDirectBufsForSharedByteBufAllocators() {`
`296`	`296`	`return conf.getBoolean("spark.network.io.preferDirectBufs", true);`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ public interface DriverPlugin {`
`41`	`41`	`* initialization.`
`42`	`42`	`* <p>`
`43`	`43`	`* It's recommended that plugins be careful about what operations are performed in this call,`
`44`		`- * preferrably performing expensive operations in a separate thread, or postponing them until`
	`44`	`+ * preferably performing expensive operations in a separate thread, or postponing them until`
`45`	`45`	`* the application has fully started.`
`46`	`46`	`*`
`47`	`47`	`* @param sc The SparkContext loading the plugin.`
Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,7 @@ private[spark] trait ExecutorAllocationClient {`
`88`	`88`	`* Default implementation delegates to kill, scheduler must override`
`89`	`89`	`* if it supports graceful decommissioning.`
`90`	`90`	`*`
`91`		`- * @param executorsAndDecominfo identifiers of executors & decom info.`
	`91`	`+ * @param executorsAndDecomInfo identifiers of executors & decom info.`
`92`	`92`	`* @param adjustTargetNumExecutors whether the target number of executors will be adjusted down`
`93`	`93`	`* after these executors have been decommissioned.`
`94`	`94`	`* @return the ids of the executors acknowledged by the cluster manager to be removed.`
Original file line number	Diff line number	Diff line change
`@@ -279,6 +279,9 @@ private[spark] class ExecutorAllocationManager(`
`279`	`279`	`numExecutorsTargetPerResourceProfileId.keys.foreach { rpId =>`
`280`	`280`	`numExecutorsTargetPerResourceProfileId(rpId) = initialNumExecutors`
`281`	`281`	`}`
	`282`	`+ numExecutorsToAddPerResourceProfileId.keys.foreach { rpId =>`
	`283`	`+ numExecutorsToAddPerResourceProfileId(rpId) = 1`
	`284`	`+ }`
`282`	`285`	`executorMonitor.reset()`
`283`	`286`	`}`
`284`	`287`
`@@ -595,7 +598,7 @@ private[spark] class ExecutorAllocationManager(`
`595`	`598`	`// reset the newExecutorTotal to the existing number of executors`
`596`	`599`	`if (testing \|\| executorsRemoved.nonEmpty) {`
`597`	`600`	`if (decommissionEnabled) {`
`598`		`- executorMonitor.executorsDecommissioned(executorsRemoved)`
	`601`	`+ executorMonitor.executorsDecommissioned(executorsRemoved.toSeq)`
`599`	`602`	`} else {`
`600`	`603`	`executorMonitor.executorsKilled(executorsRemoved.toSeq)`
`601`	`604`	`}`
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ import org.apache.spark.util.Utils.executeAndGetOutput`
`29`	`29`	`/**`
`30`	`30`	`* The default plugin that is loaded into a Spark application to control how custom`
`31`	`31`	`* resources are discovered. This executes the discovery script specified by the user`
`32`		`- * and gets the json output back and contructs ResourceInformation objects from that.`
	`32`	`+ * and gets the json output back and constructs ResourceInformation objects from that.`
`33`	`33`	`* If the user specifies custom plugins, this is the last one to be executed and`
`34`	`34`	`* throws if the resource isn't discovered.`
`35`	`35`	`*`