1818package org .apache .spark .deploy
1919
2020import java .security .PrivilegedExceptionAction
21+ import java .util .{Collection , TimerTask , Timer }
22+ import java .io .{File , IOException }
23+ import java .net .URI
2124
25+ import org .apache .hadoop .fs .{FileSystem , Path }
26+ import org .apache .hadoop .security .token .{TokenIdentifier , Token }
27+ import org .apache .hadoop .fs .permission .FsPermission
2228import org .apache .hadoop .conf .Configuration
2329import org .apache .hadoop .mapred .JobConf
2430import org .apache .hadoop .security .Credentials
2531import org .apache .hadoop .security .UserGroupInformation
2632
27- import org .apache .spark .{ Logging , SparkContext , SparkConf , SparkException }
33+ import org .apache .spark ._
2834import org .apache .spark .annotation .DeveloperApi
2935
3036import scala .collection .JavaConversions ._
@@ -38,6 +44,8 @@ class SparkHadoopUtil extends Logging {
3844 val conf : Configuration = newConfiguration(new SparkConf ())
3945 UserGroupInformation .setConfiguration(conf)
4046
47+ val sparkConf = new SparkConf ()
48+
4149 /**
4250 * Runs the given function with a Hadoop UserGroupInformation as a thread local variable
4351 * (distributed to child threads), used for authenticating HDFS and YARN calls.
@@ -117,6 +125,170 @@ class SparkHadoopUtil extends Logging {
117125
118126 def getSecretKeyFromUserCredentials (key : String ): Array [Byte ] = { null }
119127
128+ /**
129+ * Return whether Hadoop security is enabled or not.
130+ *
131+ * @return Whether Hadoop security is enabled or not
132+ */
133+ def isSecurityEnabled (): Boolean = {
134+ UserGroupInformation .isSecurityEnabled
135+ }
136+
137+ /**
138+ * Do user authentication when Hadoop security is turned on. Used by the driver.
139+ *
140+ * @param sc Spark context
141+ */
142+ def doUserAuthentication (sc : SparkContext ) {
143+ getAuthenticationType match {
144+ case " keytab" => {
145+ // Authentication through a Kerberos keytab file. Necessary for
146+ // long-running services like Shark/Spark Streaming.
147+ scheduleKerberosRenewTask(sc)
148+ }
149+ case _ => {
150+ // No authentication needed. Assuming authentication is already done
151+ // before Spark is launched, e.g., the user has authenticated with
152+ // Kerberos through kinit already.
153+ // Renew a Hadoop delegation token and store the token into a file.
154+ // Add the token file so it gets downloaded by every slave nodes.
155+ sc.addFile(initDelegationToken().toString)
156+ }
157+ }
158+ }
159+
160+ /**
161+ * Get the user whom the task belongs to.
162+ *
163+ * @param userName Name of the user whom the task belongs to
164+ * @return The user whom the task belongs to
165+ */
166+ def getTaskUser (userName : String ): UserGroupInformation = {
167+ val ugi = UserGroupInformation .createRemoteUser(userName)
168+ // Change the authentication method to Kerberos
169+ ugi.setAuthenticationMethod(
170+ UserGroupInformation .AuthenticationMethod .KERBEROS )
171+ // Get and add Hadoop delegation tokens for the user
172+ val iter = getDelegationTokens().iterator()
173+ while (iter.hasNext) {
174+ ugi.addToken(iter.next())
175+ }
176+
177+ ugi
178+ }
179+
180+ /**
181+ * Get the type of Hadoop security authentication.
182+ *
183+ * @return Type of Hadoop security authentication
184+ */
185+ private def getAuthenticationType : String = {
186+ sparkConf.get(" spark.hadoop.security.authentication" )
187+
188+ }
189+
190+ /**
191+ * Schedule a timer task for automatically renewing Kerberos credential.
192+ *
193+ * @param sc @param sc Spark context
194+ */
195+ private def scheduleKerberosRenewTask (sc : SparkContext ): Unit = {
196+ val kerberosRenewTimer = new Timer ()
197+ val kerberosRenewTimerTask = new TimerTask {
198+ def run (): Unit = {
199+ try {
200+ kerberosLoginFromKeytab
201+ // Renew a Hadoop delegation token and store the token into a file.
202+ // Add the token file so it gets downloaded by every slave nodes.
203+ sc.addFile(initDelegationToken().toString)
204+ } catch {
205+ case ioe : IOException => {
206+ logError(" Failed to login from Kerberos keytab" , ioe)
207+ }
208+ }
209+ }
210+ }
211+
212+ val interval = sparkConf.getLong(
213+ " spark.hadoop.security.kerberos.renewInterval" , 21600000 )
214+ kerberosRenewTimer.schedule(kerberosRenewTimerTask, 0 , interval)
215+ logInfo(" Scheduled timer task for renewing Kerberos credential" )
216+ }
217+
218+ /**
219+ * Log a user in from a keytab file. Loads user credential from a keytab
220+ * file and logs the user in.
221+ */
222+ private def kerberosLoginFromKeytab (): Unit = {
223+ val user = System .getProperty(" user.name" )
224+ val home = System .getProperty(" user.home" )
225+ val defaultKeytab = home + Path .SEPARATOR + user + " .keytab"
226+ val keytab = sparkConf.get(
227+ " spark.hadoop.security.kerberos.keytab" , defaultKeytab)
228+ .replaceAll(" _USER" , user).replaceAll(" _HOME" , home)
229+ val principal = sparkConf.get(
230+ " spark.hadoop.security.kerberos.principal" , user).replaceAll(" _USER" , user)
231+ .replaceAll(" _HOME" , home)
232+
233+ // Keytab file not found
234+ if (! new File (keytab).exists()) {
235+ throw new IOException (" Keytab file %s not found" .format(keytab))
236+ }
237+
238+ loginUserFromKeytab(principal, keytab)
239+ }
240+
241+ /**
242+ * Initialize a Hadoop delegation token, store the token into a file,
243+ * and add it to the SparkContext so executors can get it.
244+ *
245+ * @return URI of the token file
246+ */
247+ private def initDelegationToken (): URI = {
248+ val localFS = FileSystem .getLocal(conf)
249+ // Store the token file under user's home directory
250+ val tokenFile = new Path (localFS.getHomeDirectory, sparkConf.get(
251+ " spark.hadoop.security.token.name" , " spark.token" ))
252+ if (localFS.exists(tokenFile)) {
253+ localFS.delete(tokenFile, false )
254+ }
255+
256+ // Get a new token and write it to the given token file
257+ val currentUser = UserGroupInformation .getCurrentUser
258+ val fs = FileSystem .get(conf)
259+ val token : Token [_ <: TokenIdentifier ] =
260+ fs.getDelegationToken(currentUser.getShortUserName)
261+ .asInstanceOf [Token [_ <: TokenIdentifier ]]
262+ val cred = new Credentials ()
263+ cred.addToken(token.getService, token)
264+ cred.writeTokenStorageFile(tokenFile, conf)
265+ // Make sure the token file is read-only to the owner
266+ localFS.setPermission(tokenFile, FsPermission .createImmutable(0400 ))
267+
268+ logInfo(" Stored Hadoop delegation token for user %s to file %s" .format(
269+ currentUser.getShortUserName, tokenFile.toUri.toString))
270+ tokenFile.toUri
271+ }
272+
273+ /**
274+ * Get delegation tokens from the token file added through SparkContext.addFile().
275+ *
276+ * @return Collection of delegation tokens
277+ */
278+ private def getDelegationTokens (): Collection [Token [_ <: TokenIdentifier ]] = {
279+ // Get the token file added through SparkContext.addFile()
280+ val source = new File (SparkFiles .get(sparkConf.get(
281+ " spark.hadoop.security.token.name" , " spark.token" )))
282+ if (source.exists()) {
283+ val sourcePath = new Path (" file://" + source.getAbsolutePath)
284+ // Read credentials from the token file
285+ Credentials .readTokenStorageFile(sourcePath, conf).getAllTokens
286+ } else {
287+ throw new IOException (
288+ " Token file %s does not exist" .format(source.getAbsolutePath))
289+ }
290+ }
291+
120292 def loginUserFromKeytab (principalName : String , keytabFilename : String ) {
121293 UserGroupInformation .loginUserFromKeytab(principalName, keytabFilename)
122294 }
0 commit comments