-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-22404][YARN] Provide an option to use unmanaged AM in yarn-client mode #19616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
e51f99e
764c302
640013b
cba0c6d
19b6c3a
ce94235
0921f7a
837d25f
65aeba9
93b016f
dc31940
23ad9de
1c02b7d
2429e19
6854fc4
3b377af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,7 @@ import com.google.common.io.Files | |
| import org.apache.hadoop.conf.Configuration | ||
| import org.apache.hadoop.fs._ | ||
| import org.apache.hadoop.fs.permission.FsPermission | ||
| import org.apache.hadoop.io.DataOutputBuffer | ||
| import org.apache.hadoop.io.{DataOutputBuffer, Text} | ||
| import org.apache.hadoop.mapreduce.MRJobConfig | ||
| import org.apache.hadoop.security.{Credentials, UserGroupInformation} | ||
| import org.apache.hadoop.util.StringUtils | ||
|
|
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records._ | |
| import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication} | ||
| import org.apache.hadoop.yarn.conf.YarnConfiguration | ||
| import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException | ||
| import org.apache.hadoop.yarn.security.AMRMTokenIdentifier | ||
| import org.apache.hadoop.yarn.util.Records | ||
|
|
||
| import org.apache.spark.{SecurityManager, SparkConf, SparkException} | ||
|
|
@@ -69,6 +70,10 @@ private[spark] class Client( | |
|
|
||
| private val isClusterMode = sparkConf.get("spark.submit.deployMode", "client") == "cluster" | ||
|
|
||
| private val isClientUnmanagedAMEnabled = | ||
| sparkConf.getBoolean("spark.yarn.un-managed-am", false) && !isClusterMode | ||
|
||
| private var amServiceStarted = false | ||
|
||
|
|
||
| // AM related configurations | ||
| private val amMemory = if (isClusterMode) { | ||
| sparkConf.get(DRIVER_MEMORY).toInt | ||
|
|
@@ -282,7 +287,10 @@ private[spark] class Client( | |
| "does not support it", e) | ||
| } | ||
| } | ||
|
|
||
| if (isClientUnmanagedAMEnabled) { | ||
| // Set Unmanaged AM to true in Application Submission Context | ||
| appContext.setUnmanagedAM(true) | ||
|
||
| } | ||
| appContext | ||
| } | ||
|
|
||
|
|
@@ -656,7 +664,9 @@ private[spark] class Client( | |
| // Clear the cache-related entries from the configuration to avoid them polluting the | ||
| // UI's environment page. This works for client mode; for cluster mode, this is handled | ||
| // by the AM. | ||
| CACHE_CONFIGS.foreach(sparkConf.remove) | ||
| if (!isClientUnmanagedAMEnabled) { | ||
|
||
| CACHE_CONFIGS.foreach(sparkConf.remove) | ||
| } | ||
|
|
||
| localResources | ||
| } | ||
|
|
@@ -784,6 +794,9 @@ private[spark] class Client( | |
| val env = new HashMap[String, String]() | ||
| populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH)) | ||
| env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString | ||
| if (isClientUnmanagedAMEnabled) { | ||
| System.setProperty("SPARK_YARN_STAGING_DIR", stagingDirPath.toString) | ||
|
||
| } | ||
| env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName() | ||
| if (loginFromKeytab) { | ||
| val credentialsFile = "credentials-" + UUID.randomUUID().toString | ||
|
|
@@ -1104,14 +1117,39 @@ private[spark] class Client( | |
| if (returnOnRunning && state == YarnApplicationState.RUNNING) { | ||
| return (state, report.getFinalApplicationStatus) | ||
| } | ||
|
|
||
| if (state == YarnApplicationState.ACCEPTED && isClientUnmanagedAMEnabled | ||
| && !amServiceStarted && report.getAMRMToken != null) { | ||
|
||
| amServiceStarted = true | ||
| startApplicationMasterService(report) | ||
| } | ||
| lastState = state | ||
| } | ||
|
|
||
| // Never reached, but keeps compiler happy | ||
| throw new SparkException("While loop is depleted! This should never happen...") | ||
| } | ||
|
|
||
| private def startApplicationMasterService(report: ApplicationReport) = { | ||
|
||
| // Add AMRMToken to establish connection between RM and AM | ||
| val token = report.getAMRMToken | ||
| val amRMToken: org.apache.hadoop.security.token.Token[AMRMTokenIdentifier] = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to make this copy? Isn't the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. report.getAMRMToken gives org.apache.hadoop.yarn.api.records.Token type instance, but currentUGI.addToken expects org.apache.hadoop.security.token.Token type instance. |
||
| new org.apache.hadoop.security.token.Token[AMRMTokenIdentifier](token | ||
|
||
| .getIdentifier().array(), token.getPassword().array, new Text( | ||
| token.getKind()), new Text(token.getService())) | ||
| val currentUGI = UserGroupInformation.getCurrentUser | ||
| currentUGI.addToken(amRMToken) | ||
|
|
||
| System.setProperty( | ||
|
||
| ApplicationConstants.Environment.CONTAINER_ID.name(), | ||
| ContainerId.newContainerId(report.getCurrentApplicationAttemptId, 1).toString) | ||
|
||
| val amArgs = new ApplicationMasterArguments(Array("--arg", | ||
|
||
| sparkConf.get("spark.driver.host") + ":" + sparkConf.get("spark.driver.port"))) | ||
| // Start Application Service in a separate thread and continue with application monitoring | ||
| new Thread() { | ||
|
||
| override def run(): Unit = new ApplicationMaster(amArgs, sparkConf, hadoopConf).run() | ||
| }.start() | ||
| } | ||
|
|
||
| private def formatReportDetails(report: ApplicationReport): String = { | ||
| val details = Seq[(String, String)]( | ||
| ("client token", getClientToken(report)), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -187,7 +187,13 @@ object YarnSparkHadoopUtil { | |
| } | ||
|
|
||
| def getContainerId: ContainerId = { | ||
| val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()) | ||
| val containerIdString = | ||
|
||
| if (System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()) != null) { | ||
|
||
| System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()) | ||
| } else { | ||
| System.getProperty( | ||
| ApplicationConstants.Environment.CONTAINER_ID.name()) | ||
| } | ||
| ConverterUtils.toContainerId(containerIdString) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't follow Spark's convention for multi-line arguments.
This also looks a little odd now, because there are conflicting arguments.
ApplicationMasterArgumentsis now only used in cluster mode, and everything else is expected to be provided in the other parameters. So while this is the simpler change, it's also a little ugly.I don't really have a good suggestion right now, but it's something to think about.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made changes to the default constructor and added another constructor. Please check and let me know anything can be done better.