Delete Jar command

sandeep-katta · sandeep-katta · commit 68e7330434a0 · 2019-12-13T00:06:29.000+05:30
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -283,6 +283,12 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] val addedFiles = new ConcurrentHashMap[String, Long]().asScala
   private[spark] val addedJars = new ConcurrentHashMap[String, Long]().asScala
 
+  // this is used to maintain mapping forName to it's path. If the schema is `file`, then addedJars
+  // will replace schema to  `spark` as shown below, for more details check addLocalJarFile and
+  // checkRemoteJarFile in addJar API
+  // e.g. add jar /opt/somepath/some.jar => spark://11.242.157.133:36723/jars/some.jar
+  private[spark] val jarsToPath = new ConcurrentHashMap[String, String]().asScala
+
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = {
     val map: ConcurrentMap[Int, RDD[_]] = new MapMaker().weakValues().makeMap[Int, RDD[_]]()
@@ -1905,6 +1911,7 @@ class SparkContext(config: SparkConf) extends Logging {
         }
       }
       if (key != null) {
+        jarsToPath.putIfAbsent(new Path(path).getName, path)
         val timestamp = System.currentTimeMillis
         if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
           logInfo(s"Added JAR $path at $key with timestamp $timestamp")
@@ -1917,11 +1924,31 @@ class SparkContext(config: SparkConf) extends Logging {
     }
   }
 
+  /**
+   * Removes the jar from the addedJars, so that next batch of the taskSet will get the updated jars
+   */
+  def deleteJar(path: String): Unit = {
+    val uri = new URI(path)
+    val key = uri.getScheme match {
+      // this code is inline with addJar, key is generated based on the file schema,
+      // refer checkRemoteJarFile and addLocalJarFile implementation
+      case null | "file" =>
+        val fName = new File(uri.getRawPath).getName
+        env.rpcEnv.fileServer.deleteJar(fName)
+        s"${env.rpcEnv.address.toSparkURL}/jars/${Utils.encodeFileNameToURIRawPath(fName)}"
+      case _ => uri.toString
+    }
+    addedJars.remove(key)
+    jarsToPath.remove(new Path(path).getName)
+  }
+
   /**
    * Returns a list of jar files that are added to resources.
    */
   def listJars(): Seq[String] = addedJars.keySet.toSeq
 
+  private[spark] def getPath(jarName: String): Option[String] = jarsToPath.get(jarName)
+
   /**
    * When stopping SparkContext inside Spark components, it's easy to cause dead-lock since Spark
    * may wait for some internal threads to finish. It's better to use this method to stop
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
@@ -175,6 +175,11 @@ private[spark] trait RpcEnvFileServer {
    */
   def addJar(file: File): String
 
+  /**
+   * Removes a jar from the RpcEnv.
+   */
+  def deleteJar(jarName: String): Unit
+
   /**
    * Adds a local directory to be served via this file server.
    *
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyStreamManager.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyStreamManager.scala
@@ -88,4 +88,8 @@ private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv)
     s"${rpcEnv.address.toSparkURL}$fixedBaseUri"
   }
 
+  override def deleteJar(jarName: String): Unit = {
+    jars.remove(jarName)
+  }
+
 }
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -222,7 +222,7 @@ statement
         multipartIdentifier partitionSpec?                             #loadData
     | TRUNCATE TABLE multipartIdentifier partitionSpec?                #truncateTable
     | MSCK REPAIR TABLE multipartIdentifier                            #repairTable
-    | op=(ADD | LIST) identifier (STRING | .*?)                        #manageResource
+    | op=(ADD | LIST | DELETE) identifier (STRING | .*?)               #manageResource
     | SET ROLE .*?                                                     #failNativeCommand
     | SET .*?                                                          #setConfiguration
     | RESET                                                            #resetConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -307,6 +307,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * {{{
    *   ADD (FILE[S] <filepath ...> | JAR[S] <jarpath ...>)
    *   LIST (FILE[S] [filepath ...] | JAR[S] [jarpath ...])
+   *   DELETE (FILE[S] <filepath ...> | JAR[S] <jarpath ...>)
    * }}}
    *
    * Note that filepath/jarpath can be given as follows;
@@ -339,6 +340,11 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
             }
           case other => operationNotAllowed(s"LIST with resource type '$other'", ctx)
         }
+      case SqlBaseParser.DELETE =>
+        ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
+          case "jar" => DeleteJarCommand(mayebePaths)
+          case other => operationNotAllowed(s"DELETE with resource type '$other'", ctx)
+        }
       case _ => operationNotAllowed(s"Other types of operation on resources", ctx)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.io.File
+import java.io.{File, FileNotFoundException}
 import java.net.URI
 
 import org.apache.hadoop.fs.Path
@@ -99,3 +99,20 @@ case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends Runnab
     }
   }
 }
+
+/**
+ * Deletes a jar from the current session, so it can be removed from the classPath
+ */
+case class DeleteJarCommand(path: String) extends RunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val jarName = new Path(path).getName
+    sparkSession.sparkContext.getPath(jarName) match {
+      case Some(jarPath) =>
+        sparkSession.sessionState.resourceLoader.deleteJar(jarPath)
+        Seq.empty[Row]
+      case None => throw new FileNotFoundException(s"${jarName} does not exists")
+    }
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.internal
 
 import java.io.File
+import java.net.URL
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -164,6 +165,12 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade
    */
   def addJar(path: String): Unit = {
     session.sparkContext.addJar(path)
+    val jarURL: URL = getJarURL(path)
+    session.sharedState.addJar(jarURL)
+    Thread.currentThread().setContextClassLoader(session.sharedState.jarClassLoader)
+  }
+
+  private def getJarURL(path: String) = {
     val uri = new Path(path).toUri
     val jarURL = if (uri.getScheme == null) {
       // `path` is a local file path without a URL scheme
@@ -172,7 +179,16 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade
       // `path` is a URL with a scheme
       uri.toURL
     }
-    session.sharedState.jarClassLoader.addURL(jarURL)
+    jarURL
+  }
+
+  /**
+   * Deletes a jar from [[SparkContext]] and the classloader
+   */
+  def deleteJar(path: String): Unit = {
+    session.sparkContext.deleteJar(path)
+    val jarURL: URL = getJarURL(path)
+    session.sharedState.deleteJar(jarURL)
     Thread.currentThread().setContextClassLoader(session.sharedState.jarClassLoader)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.status.ElementTrackingStore
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 
 /**
@@ -183,11 +183,24 @@ private[sql] class SharedState(
     new GlobalTempViewManager(globalTempDB)
   }
 
+  private val parentClassLoader = Utils.getContextOrSparkClassLoader
   /**
    * A classloader used to load all user-added jar.
    */
-  val jarClassLoader = new NonClosableMutableURLClassLoader(
-    org.apache.spark.util.Utils.getContextOrSparkClassLoader)
+  private var closeableJarClassLoader = new MutableURLClassLoader(Array.empty, parentClassLoader)
+
+  def jarClassLoader: MutableURLClassLoader = closeableJarClassLoader
+
+  def addJar(jarURL: URL): Unit = synchronized {
+    jarClassLoader.addURL(jarURL)
+  }
+
+  def deleteJar(jarURL: URL): Unit = synchronized {
+    val newJars = closeableJarClassLoader.getURLs.filter(!_.equals(jarURL))
+    closeableJarClassLoader.close()
+    closeableJarClassLoader = null
+    closeableJarClassLoader = new MutableURLClassLoader(newJars, parentClassLoader)
+  }
 
 }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -363,7 +363,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         // scalastyle:off println
         if (proc.isInstanceOf[Driver] || proc.isInstanceOf[SetProcessor] ||
           proc.isInstanceOf[AddResourceProcessor] || proc.isInstanceOf[ListResourceProcessor] ||
-          proc.isInstanceOf[ResetProcessor] ) {
+          proc.isInstanceOf[ResetProcessor] || proc.isInstanceOf[DeleteResourceProcessor]) {
           val driver = new SparkSQLDriver
 
           driver.init()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -117,4 +117,9 @@ class HiveSessionResourceLoader(
     client.addJar(path)
     super.addJar(path)
   }
+
+  override def deleteJar(path: String): Unit = {
+    client.deleteJar(path)
+    super.deleteJar(path)
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -283,6 +283,9 @@ private[hive] trait HiveClient {
   /** Add a jar into class loader */
   def addJar(path: String): Unit
 
+  /** Deletes a jar from class loader */
+  def deleteJar(path: String): Unit
+
   /** Return a [[HiveClient]] as new session, that will share the class loader and Hive client */
   def newSession(): HiveClient
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.client
 
 import java.io.{File, PrintStream}
 import java.lang.{Iterable => JIterable}
+import java.net.URL
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.{Locale, Map => JMap}
 import java.util.concurrent.TimeUnit._
@@ -930,6 +931,12 @@ private[hive] class HiveClientImpl(
   }
 
   def addJar(path: String): Unit = {
+    val jarURL: URL = getJarURL(path)
+    clientLoader.addJar(jarURL)
+    runSqlHive(s"ADD JAR $path")
+  }
+
+  private def getJarURL(path: String) = {
     val uri = new Path(path).toUri
     val jarURL = if (uri.getScheme == null) {
       // `path` is a local file path without a URL scheme
@@ -938,8 +945,13 @@ private[hive] class HiveClientImpl(
       // `path` is a URL with a scheme
       uri.toURL
     }
-    clientLoader.addJar(jarURL)
-    runSqlHive(s"ADD JAR $path")
+    jarURL
+  }
+
+  override def deleteJar(path: String): Unit = {
+    val jarURL: URL = getJarURL(path)
+    clientLoader.deleteJar(jarURL)
+    runSqlHive(s"DELETE JAR $path")
   }
 
   def newSession(): HiveClientImpl = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Original file line number	Diff line number	Diff line change
`@@ -88,4 +88,8 @@ private[netty] class NettyStreamManager(rpcEnv: NettyRpcEnv)`
`88`	`88`	`s"${rpcEnv.address.toSparkURL}$fixedBaseUri"`
`89`	`89`	`}`
`90`	`90`
	`91`	`+ override def deleteJar(jarName: String): Unit = {`
	`92`	`+ jars.remove(jarName)`
	`93`	`+ }`
	`94`	`+`
`91`	`95`	`}`
Original file line number	Diff line number	Diff line change
`@@ -307,6 +307,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {`
`307`	`307`	`* {{{`
`308`	`308`	`* ADD (FILE[S] <filepath ...> \| JAR[S] <jarpath ...>)`
`309`	`309`	`* LIST (FILE[S] [filepath ...] \| JAR[S] [jarpath ...])`
	`310`	`+ * DELETE (FILE[S] <filepath ...> \| JAR[S] <jarpath ...>)`
`310`	`311`	`* }}}`
`311`	`312`	`*`
`312`	`313`	`* Note that filepath/jarpath can be given as follows;`
`@@ -339,6 +340,11 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {`
`339`	`340`	`}`
`340`	`341`	`case other => operationNotAllowed(s"LIST with resource type '$other'", ctx)`
`341`	`342`	`}`
	`343`	`+ case SqlBaseParser.DELETE =>`
	`344`	`+ ctx.identifier.getText.toLowerCase(Locale.ROOT) match {`
	`345`	`+ case "jar" => DeleteJarCommand(mayebePaths)`
	`346`	`+ case other => operationNotAllowed(s"DELETE with resource type '$other'", ctx)`
	`347`	`+ }`
`342`	`348`	`case _ => operationNotAllowed(s"Other types of operation on resources", ctx)`
`343`	`349`	`}`
`344`	`350`	`}`
Original file line number	Diff line number	Diff line change
`@@ -117,4 +117,9 @@ class HiveSessionResourceLoader(`
`117`	`117`	`client.addJar(path)`
`118`	`118`	`super.addJar(path)`
`119`	`119`	`}`
	`120`	`+`
	`121`	`+ override def deleteJar(path: String): Unit = {`
	`122`	`+ client.deleteJar(path)`
	`123`	`+ super.deleteJar(path)`
	`124`	`+ }`
`120`	`125`	`}`