apache · rezasafi · Jul 26, 2018 · Aug 7, 2018 · Aug 8, 2018 · Sep 25, 2018
diff --git a/core/src/main/scala/org/apache/spark/Heartbeater.scala b/core/src/main/scala/org/apache/spark/Heartbeater.scala
@@ -60,9 +60,11 @@ private[spark] class Heartbeater(
   }
 
   /**
-   * Get the current executor level metrics. These are returned as an array
+   * Get the current executor level metrics. These are returned as an array, with the index
+   * determined by ExecutorMetricType.metricToOffset
    */
   def getCurrentMetrics(): ExecutorMetrics = {
+
     val metrics = new Array[Long](ExecutorMetricType.numMetrics)
     var offset = 0
     ExecutorMetricType.metricGetters.foreach { metric =>

diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -27,7 +27,7 @@ import org.apache.spark.metrics.ExecutorMetricType
  */
 @DeveloperApi
 class ExecutorMetrics private[spark] extends Serializable {
-
+  // Metrics are indexed by ExecutorMetricType.metricToOffset
   private val metrics = new Array[Long](ExecutorMetricType.numMetrics)
   // the first element is initialized to -1, indicating that the values for the array
   // haven't been set yet.

diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -138,19 +138,22 @@ private[spark] class ProcfsMetricsGetter(
       }
       val stdoutThread = Utils.processStreamByLine("read stdout for pgrep",
         process.getInputStream, appendChildPid)
-      val error = process.getErrorStream
-      var errorString = ""
-      (0 until error.available()).foreach { i =>
-        errorString += error.read()
-      }
+      val errorStringBuilder = new StringBuilder()
+      val stdErrThread = Utils.processStreamByLine(
+        "stderr for pgrep",
+        process.getErrorStream,
+        { line =>
+        errorStringBuilder.append(line)
+      })
       val exitCode = process.waitFor()
       stdoutThread.join()
+      stdErrThread.join()
+      val errorString = errorStringBuilder.toString()
       // pgrep will have exit code of 1 if there are more than one child process
       // and it will have a exit code of 2 if there is no child process
       if (exitCode != 0 && exitCode > 2) {
         val cmd = builder.command().toArray.mkString(" ")
-        logWarning(s"Process $cmd" +
-          s" exited with code $exitCode, with stderr:" + s"${errorString} ")
+        logWarning(s"Process $cmd exited with code $exitCode and stderr: $errorString")
         throw new SparkException(s"Process $cmd exited with code $exitCode")
       }
       childPidsInInt
@@ -165,43 +168,37 @@ private[spark] class ProcfsMetricsGetter(
 
   def addProcfsMetricsFromOneProcess(
       allMetrics: ProcfsMetrics,
-      pid: Int):
-      ProcfsMetrics = {
+      pid: Int): ProcfsMetrics = {
 
-    // Hadoop ProcfsBasedProcessTree class used regex and pattern matching to retrive the memory
-    // info. I tried that but found it not correct during tests, so I used normal string analysis
-    // instead. The computation of RSS and Vmem are based on proc(5):
+    // The computation of RSS and Vmem are based on proc(5):
     // http://man7.org/linux/man-pages/man5/proc.5.html
     try {
       val pidDir = new File(procfsDir, pid.toString)
-      Utils.tryWithResource( new InputStreamReader(
+      Utils.tryWithResource(new InputStreamReader(
         new FileInputStream(
           new File(pidDir, procfsStatFile)), Charset.forName("UTF-8"))) { fReader =>
         Utils.tryWithResource( new BufferedReader(fReader)) { in =>
           val procInfo = in.readLine
           val procInfoSplit = procInfo.split(" ")
-          if (procInfoSplit != null) {
-            val vmem = procInfoSplit(22).toLong
-            val rssPages = procInfoSplit(23).toLong
-            if (procInfoSplit(1).toLowerCase(Locale.US).contains("java")) {
-              return allMetrics.copy(
-                jvmVmemTotal = allMetrics.jvmVmemTotal + vmem,
-                jvmRSSTotal = allMetrics.jvmRSSTotal + (rssPages*pageSize)
-              )
-            }
-            else if (procInfoSplit(1).toLowerCase(Locale.US).contains("python")) {
-              return allMetrics.copy(
-                pythonVmemTotal = allMetrics.pythonVmemTotal + vmem,
-                pythonRSSTotal = allMetrics.pythonRSSTotal + (rssPages*pageSize)
-              )
-            }
-              return allMetrics.copy(
-                otherVmemTotal = allMetrics.otherVmemTotal + vmem,
-                otherRSSTotal = allMetrics.otherRSSTotal + (rssPages*pageSize)
-              )
+          val vmem = procInfoSplit(22).toLong
+          val rssMem = procInfoSplit(23).toLong * pageSize
+          if (procInfoSplit(1).toLowerCase(Locale.US).contains("java")) {
+            allMetrics.copy(
+              jvmVmemTotal = allMetrics.jvmVmemTotal + vmem,
+              jvmRSSTotal = allMetrics.jvmRSSTotal + (rssMem)
+            )
+          }
+          else if (procInfoSplit(1).toLowerCase(Locale.US).contains("python")) {
+            allMetrics.copy(
+              pythonVmemTotal = allMetrics.pythonVmemTotal + vmem,
+              pythonRSSTotal = allMetrics.pythonRSSTotal + (rssMem)
+            )
           }
           else {
-            return ProcfsMetrics(0, 0, 0, 0, 0, 0)
+            allMetrics.copy(
+              otherVmemTotal = allMetrics.otherVmemTotal + vmem,
+              otherRSSTotal = allMetrics.otherRSSTotal + (rssMem)
+            )
           }
         }
       }

diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -146,9 +146,8 @@ private[spark] object ExecutorMetricType {
     val definedMetricsAndOffset = mutable.LinkedHashMap.empty[String, Int]
     metricGetters.foreach { m =>
       var metricInSet = 0
-      while (metricInSet < m.names.length) {
-        definedMetricsAndOffset += (m.names(metricInSet) -> (metricInSet + numberOfMetrics))
-        metricInSet += 1
+      (0 until m.names.length).foreach { idx =>
+        definedMetricsAndOffset += (m.names(idx) -> (idx + numberOfMetrics))
       }
       numberOfMetrics += m.names.length
     }

diff --git a/...src/test/resources/ProcessTree/22763/stat → ...c/test/resources/ProcfsMetrics/22763/stat b/...src/test/resources/ProcessTree/22763/stat → ...c/test/resources/ProcfsMetrics/22763/stat
diff --git a/...src/test/resources/ProcessTree/26109/stat → ...c/test/resources/ProcfsMetrics/26109/stat b/...src/test/resources/ProcessTree/26109/stat → ...c/test/resources/ProcfsMetrics/26109/stat
diff --git a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.SparkFunSuite
 
 class ProcfsMetricsGetterSuite extends SparkFunSuite {
 
-  val p = new ProcfsMetricsGetter(getTestResourcePath("ProcessTree"), 4096L)
+  val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"), 4096L)
 
   test("testGetProcessInfo") {
     var r = ProcfsMetrics(0, 0, 0, 0, 0, 0)