diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index e72b9cb694eb0..49f24dfbd826b 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -737,7 +737,7 @@ package object config { "application ends.") .version("3.3.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) private[spark] val SHUFFLE_SERVICE_FETCH_RDD_ENABLED = ConfigBuilder(Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index 5d635011d2ec6..7aec8eeaad423 100644 --- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -936,6 +936,7 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext { val newConf = new SparkConf newConf.set("spark.shuffle.push.enabled", "true") newConf.set("spark.shuffle.service.enabled", "true") + newConf.set("spark.shuffle.service.removeShuffle", "false") newConf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer") newConf.set(IS_TESTING, true) diff --git a/docs/configuration.md b/docs/configuration.md index 9a686bc514c5d..f2a885be6fc81 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1152,7 +1152,7 @@ Apart from these, the following properties are also available, and may be useful spark.shuffle.service.removeShuffle - false + true Whether to use the ExternalShuffleService for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. Without this enabled, diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index e64597b52ccbf..f42dfadb2a2aa 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -40,6 +40,8 @@ license: | - Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default to isolate the existing systems from Apache Ivy's incompatibility. To restore the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`. +- Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.