apache · attilapiros · Jul 13, 2020 · Jul 14, 2020 · Jul 15, 2020 · Aug 3, 2020
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -245,11 +245,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       error("Must specify a primary resource (JAR or Python or R file)")
     }
     if (driverMemory != null
-        && Try(JavaUtils.byteStringAsBytes(driverMemory)).getOrElse(-1L) <= 0) {
+        && Try(JavaUtils.byteStringAsMb(driverMemory)).getOrElse(-1L) <= 0) {
       error("Driver memory must be a positive number")
     }
     if (executorMemory != null
-        && Try(JavaUtils.byteStringAsBytes(executorMemory)).getOrElse(-1L) <= 0) {
+        && Try(JavaUtils.byteStringAsMb(executorMemory)).getOrElse(-1L) <= 0) {
       error("Executor memory must be a positive number")
     }
     if (executorCores != null && Try(executorCores.toInt).getOrElse(-1) <= 0) {

diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -222,7 +222,7 @@ object UnifiedMemoryManager {
     }
     // SPARK-12759 Check executor memory to fail fast if memory is insufficient
     if (conf.contains(config.EXECUTOR_MEMORY)) {
-      val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key)
+      val executorMemory = conf.getSizeAsMb(config.EXECUTOR_MEMORY.key)
       if (executorMemory < minSystemMemory) {
         throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +
           s"$minSystemMemory. Please increase executor memory using the " +

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -172,7 +172,7 @@ of the most common options to set are:
   <td>
     Amount of memory to use for the driver process, i.e. where SparkContext is initialized, in the
     same format as JVM memory strings with a size unit suffix ("k", "m", "g" or "t")
-    (e.g. <code>512m</code>, <code>2g</code>).
+    (e.g. <code>512m</code>, <code>2g</code>) using "m" as the default unit.
     <br />
     <em>Note:</em> In client mode, this config must not be set through the <code>SparkConf</code>
     directly in your application, because the driver JVM has already started at that point.
@@ -249,7 +249,8 @@ of the most common options to set are:
   <td>1g</td>
   <td>
     Amount of memory to use per executor process, in the same format as JVM memory strings with
-    a size unit suffix ("k", "m", "g" or "t") (e.g. <code>512m</code>, <code>2g</code>).
+    a size unit suffix ("k", "m", "g" or "t") (e.g. <code>512m</code>, <code>2g</code>) using
+    "m" as the default unit.
   </td>
   <td>0.7.0</td>
 </tr>

diff --git a/docs/monitoring.md b/docs/monitoring.md
@@ -70,7 +70,11 @@ The history server can be configured as follows:
   <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
   <tr>
     <td><code>SPARK_DAEMON_MEMORY</code></td>
-    <td>Memory to allocate to the history server (default: 1g).</td>
+    <td>
+    Memory to allocate to the history server (default: 1g). This can be configured in the same
+    format as JVM memory strings with a size unit suffix ("k", "m", "g" or "t") using "m" as
+    the default unit.
+    </td>
   </tr>
   <tr>
     <td><code>SPARK_DAEMON_JAVA_OPTS</code></td>

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
@@ -144,7 +144,10 @@ You can optionally configure the cluster further by setting environment variable
   </tr>
   <tr>
     <td><code>SPARK_WORKER_MEMORY</code></td>
-    <td>Total amount of memory to allow Spark applications to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GiB); note that each application's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.</td>
+    <td>
+    Total amount of memory to allow Spark applications to use on the machine, e.g. <code>1000m</code>, <code>2g</code> (default: total memory minus 1 GiB); note that each application's <i>individual</i> memory is configured using its <code>spark.executor.memory</code> property.
+    This can be configured in the same format as JVM memory strings with a size unit suffix ("k", "m", "g" or "t") using "m" as the default unit.
+    </td>
   </tr>
   <tr>
     <td><code>SPARK_WORKER_PORT</code></td>
@@ -164,7 +167,10 @@ You can optionally configure the cluster further by setting environment variable
   </tr>
   <tr>
     <td><code>SPARK_DAEMON_MEMORY</code></td>
-    <td>Memory to allocate to the Spark master and worker daemons themselves (default: 1g).</td>
+    <td>
+    Memory to allocate to the Spark master and worker daemons themselves (default: 1g). This can be configured in the same
+    format as JVM memory strings with a size unit suffix ("k", "m", "g" or "t") using "m" as the default unit.
+    </td>
   </tr>
   <tr>
     <td><code>SPARK_DAEMON_JAVA_OPTS</code></td>

diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -328,4 +328,17 @@ static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfN
     return libdir.getAbsolutePath();
   }
 
+  /**
+   * Add "m" as the default suffix unit when no explicit unit is given.
+   */
+  static String addDefaultMSuffixIfNeeded(String memoryString) {
+    if (memoryString.chars().allMatch(Character::isDigit)) {
+      System.err.println("Memory setting without explicit unit (" +
+        memoryString + ") is taken to be in MB by default! For details check SPARK-32293.");
+      return memoryString + "m";
+    } else {
+      return memoryString;
+    }
+  }
+
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
@@ -108,7 +108,7 @@ public List<String> buildCommand(Map<String, String> env)
     }
 
     String mem = firstNonEmpty(memKey != null ? System.getenv(memKey) : null, DEFAULT_MEM);
-    cmd.add("-Xmx" + mem);
+    cmd.add("-Xmx" + addDefaultMSuffixIfNeeded(mem));
 val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key) 
 val executorMemory = conf.getSizeAsBytes(config.EXECUTOR_MEMORY.key) 
     cmd.add(className);
     cmd.addAll(classArgs);
     return cmd;

diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -285,7 +285,7 @@ private List<String> buildSparkSubmitCommand(Map<String, String> env)
         isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null;
       String memory = firstNonEmpty(tsMemory, config.get(SparkLauncher.DRIVER_MEMORY),
         System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM);
-      cmd.add("-Xmx" + memory);
+      cmd.add("-Xmx" + addDefaultMSuffixIfNeeded(memory));
       addOptionString(cmd, driverDefaultJavaOptions);
       addOptionString(cmd, driverExtraJavaOptions);
       mergeEnvPathList(env, getLibPathEnvName(),

diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -115,6 +115,22 @@ public void testCliParser() throws Exception {
       Collections.indexOfSubList(cmd, Arrays.asList(parser.CONF, "spark.randomOption=foo")) > 0);
   }
 
+  @Test
+  public void testParserWithDefaultUnit() throws Exception {
+    List<String> sparkSubmitArgs = Arrays.asList(
+      parser.MASTER,
+      "local",
+      parser.DRIVER_MEMORY,
+      "4200",
+      parser.DRIVER_CLASS_PATH,
+      "/driverCp",
+      SparkLauncher.NO_RESOURCE);
+    Map<String, String> env = new HashMap<>();
+    List<String> cmd = buildCommand(sparkSubmitArgs, env);
+
+    assertTrue("Driver -Xmx should be configured in MB by default.", cmd.contains("-Xmx4200m"));
+  }
+
   @Test
   public void testShellCliParser() throws Exception {
     List<String> sparkSubmitArgs = Arrays.asList(

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -78,11 +78,16 @@ case "$1" in
     ;;
   executor)
     shift 1
+    MEMORY_WITH_UNIT=$SPARK_EXECUTOR_MEMORY
+    if [[ $MEMORY_WITH_UNIT =~ ^[0-9]+$ ]]
+    then
+        MEMORY_WITH_UNIT="${MEMORY_WITH_UNIT}m"
+    fi
     CMD=(
       ${JAVA_HOME}/bin/java
       "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
-      -Xms$SPARK_EXECUTOR_MEMORY
-      -Xmx$SPARK_EXECUTOR_MEMORY
+      -Xms$MEMORY_WITH_UNIT
+      -Xmx$MEMORY_WITH_UNIT
       -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
       org.apache.spark.executor.CoarseGrainedExecutorBackend
       --driver-url $SPARK_DRIVER_URL