apache · LuciferYang · Jul 31, 2019 · Jul 31, 2019 · Jul 31, 2019 · Jul 31, 2019
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -97,8 +97,8 @@ private[spark] class Client(
 
   // Executor related configurations
   private val executorMemory = sparkConf.get(EXECUTOR_MEMORY)
-  private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, MEMORY_OVERHEAD_MIN)).toInt
+  private val executorMemoryOverhead =
+    YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
 
   private val isPython = sparkConf.get(IS_PYTHON_APP)
   private val pysparkWorkerMemory: Int = if (isPython) {

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -132,8 +132,7 @@ private[yarn] class YarnAllocator(
   // Executor memory in MiB.
   protected val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt
   // Additional memory overhead.
-  protected val memoryOverhead: Int = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN)).toInt
+  protected val memoryOverhead: Int = YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
   protected val pysparkWorkerMemory: Int = if (sparkConf.get(IS_PYTHON_APP)) {
     sparkConf.get(PYSPARK_EXECUTOR_MEMORY).map(_.toInt).getOrElse(0)
   } else {

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -26,12 +26,12 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, P
 import org.apache.hadoop.yarn.util.ConverterUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.launcher.YarnCommandBuilderUtils
-import org.apache.spark.resource.ResourceID
-import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.util.Utils
 
-object YarnSparkHadoopUtil {
+object YarnSparkHadoopUtil extends Logging {
 
   // Additional memory overhead
   // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering
@@ -184,4 +184,29 @@ object YarnSparkHadoopUtil {
     ConverterUtils.toContainerId(containerIdString)
   }
 
+  /**
+   * If MEMORY_OFFHEAP_ENABLED is true, we should ensure executorOverheadMemory requested value
+   * is not less than MEMORY_OFFHEAP_SIZE, otherwise the memory resource requested for executor
+   * may be not enough.
+   */
+  def executorMemoryOverheadRequested(sparkConf: SparkConf): Int = {
+    val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt
+    val overhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
+      math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN)).toInt
+    val offHeap = if (sparkConf.get(MEMORY_OFFHEAP_ENABLED)) {
+      val size =
+        sparkConf.getSizeAsMb(MEMORY_OFFHEAP_SIZE.key, MEMORY_OFFHEAP_SIZE.defaultValueString)
+      require(size > 0,
+        s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true")
 .checkValue(_ >= 0, "The off-heap memory size must not be negative") 
 .checkValue(_ >= 0, "The off-heap memory size must not be negative") 
+      if (size > overhead) {
+        logWarning(s"The value of ${MEMORY_OFFHEAP_SIZE.key}(${size}MB) will be used as " +
+          s"executorMemoryOverhead to request resource to ensure that Executor has enough memory " +
+          s"to use. It is recommended that the configuration value of " +
+          s"${EXECUTOR_MEMORY_OVERHEAD.key} should be no less than ${MEMORY_OFFHEAP_SIZE.key} " +
+          s"when ${MEMORY_OFFHEAP_ENABLED.key} is true.")
+      }
+      size
+    } else 0
+    math.max(overhead, offHeap).toInt
+  }
 }
diff --git a/...-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/...-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -27,7 +27,9 @@ import org.scalatest.Matchers
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
@@ -140,4 +142,69 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
     }
 
   }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is false, " +
+    "use MEMORY_OVERHEAD_MIN scene") {
+    val executorMemoryOverhead =
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(new SparkConf())
+    assert(executorMemoryOverhead == MEMORY_OVERHEAD_MIN)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is false, " +
+    "use MEMORY_OVERHEAD_FACTOR * executorMemory scene") {
+    val executorMemory: Long = 5000
+    val sparkConf = new SparkConf().set(EXECUTOR_MEMORY, executorMemory)
+    val executorMemoryOverhead =
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
+    assert(executorMemoryOverhead == executorMemory * MEMORY_OVERHEAD_FACTOR)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is false, " +
+    "use EXECUTOR_MEMORY_OVERHEAD config value scene") {
+    val memoryOverhead: Long = 100
+    val sparkConf = new SparkConf().set(EXECUTOR_MEMORY_OVERHEAD, memoryOverhead)
+    val executorMemoryOverhead =
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
+    assert(executorMemoryOverhead == memoryOverhead)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is true, " +
+    "use EXECUTOR_MEMORY_OVERHEAD config value scene") {
+    val memoryOverhead: Long = 100
+    val offHeapMemory: Long = 50 * 1024 * 1024
+    val sparkConf = new SparkConf()
+      .set(EXECUTOR_MEMORY_OVERHEAD, memoryOverhead)
+      .set(MEMORY_OFFHEAP_ENABLED, true)
+      .set(MEMORY_OFFHEAP_SIZE, offHeapMemory)
+    val executorMemoryOverhead =
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
+    assert(executorMemoryOverhead == memoryOverhead)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is true, " +
+    "use MEMORY_OFFHEAP_SIZE config value scene") {
+    val memoryOverhead: Long = 50
+    val offHeapMemoryInMB = 100
+    val offHeapMemory: Long = offHeapMemoryInMB * 1024 * 1024
+    val sparkConf = new SparkConf()
+      .set(EXECUTOR_MEMORY_OVERHEAD, memoryOverhead)
+      .set(MEMORY_OFFHEAP_ENABLED, true)
+      .set(MEMORY_OFFHEAP_SIZE, offHeapMemory)
+    val executorMemoryOverhead =
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
+    assert(executorMemoryOverhead == offHeapMemoryInMB)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is true, " +
+    "but MEMORY_OFFHEAP_SIZE not config scene") {
+    val memoryOverhead: Long = 50
+    val sparkConf = new SparkConf()
+      .set(EXECUTOR_MEMORY_OVERHEAD, memoryOverhead)
+      .set(MEMORY_OFFHEAP_ENABLED, true)
+    val expected = "spark.memory.offHeap.size must be > 0 when spark.memory.offHeap.enabled == true"
+    val message = intercept[IllegalArgumentException] {
+      YarnSparkHadoopUtil.executorMemoryOverheadRequested(sparkConf)
+    }.getMessage
+    assert(message.contains(expected))
+  }
 }