apache · devaraj-kavali · Oct 31, 2017 · Dec 13, 2017 · Dec 13, 2017 · Dec 14, 2017
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException
+import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark._
@@ -51,33 +52,16 @@ import org.apache.spark.util._
 /**
  * Common application master functionality for Spark on Yarn.
  */
-private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends Logging {
+private[spark] class ApplicationMaster(args: ApplicationMasterArguments, sparkConf: SparkConf,
+                                       yarnConf: YarnConfiguration) extends Logging {
 
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
 
   private val isClusterMode = args.userClass != null
 
-  private val sparkConf = new SparkConf()
-  if (args.propertiesFile != null) {
-    Utils.getPropertiesFromFile(args.propertiesFile).foreach { case (k, v) =>
-      sparkConf.set(k, v)
-    }
-  }
-
   private val securityMgr = new SecurityManager(sparkConf)
 
-  // Set system properties for each config entry. This covers two use cases:
-  // - The default configuration stored by the SparkHadoopUtil class
-  // - The user application creating a new SparkConf in cluster mode
-  //
-  // Both cases create a new SparkConf object which reads these configs from system properties.
-  sparkConf.getAll.foreach { case (k, v) =>
-    sys.props(k) = v
-  }
-
-  private val yarnConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
-
   private val ugi = {
     val original = UserGroupInformation.getCurrentUser()
 
@@ -619,7 +603,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
       if (!preserveFiles) {
-        stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
+        stagingDirPath = new Path(System.getProperty("SPARK_YARN_STAGING_DIR",
+          System.getenv("SPARK_YARN_STAGING_DIR")))
         logInfo("Deleting staging directory " + stagingDirPath)
         val fs = stagingDirPath.getFileSystem(yarnConf)
         fs.delete(stagingDirPath, true)
@@ -666,7 +651,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
 
   /** Add the Yarn IP filter that is required for properly securing the UI. */
   private def addAmIpFilter(driver: Option[RpcEndpointRef]) = {
-    val proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
+    val proxyBase = getProxyBase
     val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
     val params = client.getAmIpFilterParams(yarnConf, proxyBase)
     driver match {
@@ -679,6 +664,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
   }
 
+  private def getProxyBase: String = {
+    var proxyBase = System.getenv(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV)
+    if (proxyBase == null) {
+      proxyBase = ProxyUriUtils.getPath(getAttemptId().getApplicationId)
+    }
+    proxyBase
+  }
+
   /**
    * Start the user class, which contains the spark driver, in a separate Thread.
    * If the main routine exits cleanly or exits with System.exit(N) for any N
@@ -822,7 +815,23 @@ object ApplicationMaster extends Logging {
   def main(args: Array[String]): Unit = {
     SignalUtils.registerLogger(log)
     val amArgs = new ApplicationMasterArguments(args)
-    master = new ApplicationMaster(amArgs)
+    val sparkConf = new SparkConf()
+    if (amArgs.propertiesFile != null) {
+      Utils.getPropertiesFromFile(amArgs.propertiesFile).foreach { case (k, v) =>
+        sparkConf.set(k, v)
+      }
+    }
+    // Set system properties for each config entry. This covers two use cases:
+    // - The default configuration stored by the SparkHadoopUtil class
+    // - The user application creating a new SparkConf in cluster mode
+    //
+    // Both cases create a new SparkConf object which reads these configs from system properties.
+    sparkConf.getAll.foreach { case (k, v) =>
+      sys.props(k) = v
+    }
+
+    val yarnConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
+    master = new ApplicationMaster(amArgs, sparkConf, yarnConf)
     System.exit(master.run())
   }
 

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -34,7 +34,7 @@ import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
-import org.apache.hadoop.io.DataOutputBuffer
+import org.apache.hadoop.io.{DataOutputBuffer, Text}
 import org.apache.hadoop.mapreduce.MRJobConfig
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.util.StringUtils
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
+import org.apache.hadoop.yarn.security.AMRMTokenIdentifier
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -69,6 +70,10 @@ private[spark] class Client(
 
   private val isClusterMode = sparkConf.get("spark.submit.deployMode", "client") == "cluster"
 
+  private val isClientUnmanagedAMEnabled =
+    sparkConf.getBoolean("spark.yarn.un-managed-am", false) && !isClusterMode
+  private var amServiceStarted = false
+
   // AM related configurations
   private val amMemory = if (isClusterMode) {
     sparkConf.get(DRIVER_MEMORY).toInt
@@ -282,7 +287,10 @@ private[spark] class Client(
             "does not support it", e)
       }
     }
-
+    if (isClientUnmanagedAMEnabled) {
+      // Set Unmanaged AM to true in Application Submission Context
+      appContext.setUnmanagedAM(true)
+    }
     appContext
   }
 
@@ -656,7 +664,9 @@ private[spark] class Client(
     // Clear the cache-related entries from the configuration to avoid them polluting the
     // UI's environment page. This works for client mode; for cluster mode, this is handled
     // by the AM.
-    CACHE_CONFIGS.foreach(sparkConf.remove)
+    if (!isClientUnmanagedAMEnabled) {
+      CACHE_CONFIGS.foreach(sparkConf.remove)
+    }
 
     localResources
   }
@@ -784,6 +794,9 @@ private[spark] class Client(
     val env = new HashMap[String, String]()
     populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
     env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
+    if (isClientUnmanagedAMEnabled) {
+      System.setProperty("SPARK_YARN_STAGING_DIR", stagingDirPath.toString)
+    }
     env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
     if (loginFromKeytab) {
       val credentialsFile = "credentials-" + UUID.randomUUID().toString
@@ -1104,14 +1117,39 @@ private[spark] class Client(
       if (returnOnRunning && state == YarnApplicationState.RUNNING) {
         return (state, report.getFinalApplicationStatus)
       }
-
+      if (state == YarnApplicationState.ACCEPTED && isClientUnmanagedAMEnabled
+        && !amServiceStarted && report.getAMRMToken != null) {
+        amServiceStarted = true
+        startApplicationMasterService(report)
+      }
       lastState = state
     }
 
     // Never reached, but keeps compiler happy
     throw new SparkException("While loop is depleted! This should never happen...")
   }
 
+  private def startApplicationMasterService(report: ApplicationReport) = {
+    // Add AMRMToken to establish connection between RM and AM
+    val token = report.getAMRMToken
+    val amRMToken: org.apache.hadoop.security.token.Token[AMRMTokenIdentifier] =
+      new org.apache.hadoop.security.token.Token[AMRMTokenIdentifier](token
+        .getIdentifier().array(), token.getPassword().array, new Text(
+        token.getKind()), new Text(token.getService()))
+    val currentUGI = UserGroupInformation.getCurrentUser
+    currentUGI.addToken(amRMToken)
+
+    System.setProperty(
+      ApplicationConstants.Environment.CONTAINER_ID.name(),
+      ContainerId.newContainerId(report.getCurrentApplicationAttemptId, 1).toString)
+    val amArgs = new ApplicationMasterArguments(Array("--arg",
+      sparkConf.get("spark.driver.host") + ":" + sparkConf.get("spark.driver.port")))
+    // Start Application Service in a separate thread and continue with application monitoring
+    new Thread() {
+      override def run(): Unit = new ApplicationMaster(amArgs, sparkConf, hadoopConf).run()
+    }.start()
+  }
+
   private def formatReportDetails(report: ApplicationReport): String = {
     val details = Seq[(String, String)](
       ("client token", getClientToken(report)),

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -187,7 +187,13 @@ object YarnSparkHadoopUtil {
   }
 
   def getContainerId: ContainerId = {
-    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+      val containerIdString =
+      if (System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()) != null) {
+        System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+      } else {
+        System.getProperty(
+          ApplicationConstants.Environment.CONTAINER_ID.name())
+      }
     ConverterUtils.toContainerId(containerIdString)
   }