apache · hbhanawat · Mar 15, 2016 · Mar 17, 2016 · Mar 18, 2016 · Mar 24, 2016
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.io._
 import java.lang.reflect.Constructor
 import java.net.URI
-import java.util.{Arrays, Properties, UUID}
+import java.util.{Arrays, Properties, ServiceLoader, UUID}
 import java.util.concurrent.ConcurrentMap
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference}
 
@@ -2443,8 +2443,34 @@ object SparkContext extends Logging {
           "in the form mesos://zk://host:port. Current Master URL will stop working in Spark 2.0.")
         createTaskScheduler(sc, "mesos://" + zkUrl, deployMode)
 
-      case _ =>
-        throw new SparkException("Could not parse Master URL: '" + master + "'")
+      case masterUrl =>
+        val cm = getClusterManager(masterUrl) match {
+          case Some(clusterMgr) => clusterMgr
+          case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
+        }
+        try {
+          val scheduler = cm.createTaskScheduler(sc)
+          val backend = cm.createSchedulerBackend(sc, scheduler)
+          cm.initialize(scheduler, backend)
+          (backend, scheduler)
+        } catch {
+          case e: Exception => {
+            throw new SparkException("External scheduler cannot be instantiated", e)
+          }
+        }
+    }
+  }
+
+  private def getClusterManager(url: String): Option[ExternalClusterManager] = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val serviceLoader = ServiceLoader.load(classOf[ExternalClusterManager], loader)
+
+    serviceLoader.asScala.filter(_.canCreate(url)).toList match {
+      // exactly one registered manager
+      case head :: Nil => Some(head)
+      case Nil => None
+      case multipleMgrs => sys.error(s"Multiple Cluster Managers registered " +
+          s"for the url $url")
     }
   }
 }

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -64,7 +64,7 @@ private[spark] class CoarseGrainedExecutorBackend(
       }
       case Failure(e) => {
         logError(s"Cannot register with driver: $driverUrl", e)
-        System.exit(1)
+        exitExecutor()
       }
     }(ThreadUtils.sameThread)
   }
@@ -82,12 +82,12 @@ private[spark] class CoarseGrainedExecutorBackend(
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
-      System.exit(1)
+      exitExecutor()
 
     case LaunchTask(data) =>
       if (executor == null) {
         logError("Received LaunchTask command but executor was null")
-        System.exit(1)
+        exitExecutor()
       } else {
         val taskDesc = ser.deserialize[TaskDescription](data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
@@ -98,7 +98,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     case KillTask(taskId, _, interruptThread) =>
       if (executor == null) {
         logError("Received KillTask command but executor was null")
-        System.exit(1)
+        exitExecutor()
       } else {
         executor.killTask(taskId, interruptThread)
       }
@@ -122,7 +122,7 @@ private[spark] class CoarseGrainedExecutorBackend(
       logInfo(s"Driver from $remoteAddress disconnected during shutdown")
     } else if (driver.exists(_.address == remoteAddress)) {
       logError(s"Driver $remoteAddress disassociated! Shutting down.")
-      System.exit(1)
+      exitExecutor()
     } else {
       logWarning(s"An unknown ($remoteAddress) driver disconnected.")
     }
@@ -135,6 +135,8 @@ private[spark] class CoarseGrainedExecutorBackend(
       case None => logWarning(s"Drop $msg because has not yet connected to driver")
     }
   }
+
+  def exitExecutor(): Unit = System.exit(1)
 }
 
 private[spark] object CoarseGrainedExecutorBackend extends Logging {

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -149,7 +149,14 @@ private[spark] class Executor(
       tr.kill(interruptThread)
     }
   }
-
+  def killAllTasks (interruptThread: Boolean) : Unit = {
+    // kill all the running tasks
+    for (taskRunner <- runningTasks.values().asScala) {
+      if (taskRunner != null) {
+        taskRunner.kill(interruptThread)
+      }
+    }
+  }
   def stop(): Unit = {
     env.metricsSystem.report()
     heartbeater.shutdown()

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExternalClusterManager.scala b/core/src/main/scala/org/apache/spark/scheduler/ExternalClusterManager.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * A cluster manager interface to plugin external scheduler.
+ *
+ */
+@DeveloperApi
+private[spark] trait ExternalClusterManager {
+
+  /**
+   * Check if this cluster manager instance can create scheduler components
+   * for a certain master URL.
+   * @param masterURL the master URL
+   * @return True if the cluster manager can create scheduler backend/
+   */
+  def canCreate(masterURL : String): Boolean
+
+  /**
+   * Create a task scheduler instance for the given SparkContext
+   * @param sc SparkContext
+   * @return TaskScheduler that will be responsible for task handling
+   */
+  def createTaskScheduler (sc: SparkContext): TaskScheduler
+
+  /**
+   * Create a scheduler backend for the given SparkContext and scheduler. This is
+   * called after task scheduler is created using [[ExternalClusterManager.createTaskScheduler()]].
+   * @param sc SparkContext
+   * @param scheduler TaskScheduler that will be used with the scheduler backend.
+   * @return SchedulerBackend that works with a TaskScheduler
+   */
+  def createSchedulerBackend (sc: SparkContext, scheduler: TaskScheduler): SchedulerBackend
+
+  /**
+   * Initialize task scheduler and backend scheduler. This is called after the
+   * scheduler components are created
+   * @param scheduler TaskScheduler that will be responsible for task handling
+   * @param backend SchedulerBackend that works with a TaskScheduler
+   */
+  def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit
+}
diff --git a/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/core/src/test/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager
@@ -0,0 +1 @@
+org.apache.spark.scheduler.CheckExternalClusterManager
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
+import org.apache.spark.storage.BlockManagerId
+
+class ExternalClusterManagerSuite extends SparkFunSuite with LocalSparkContext
+{
+  test("launch of backend and scheduler") {
+    val conf = new SparkConf().setMaster("myclusterManager").
+        setAppName("testcm").set("spark.driver.allowMultipleContexts", "true")
+    sc = new SparkContext(conf)
+    // check if the scheduler components are created
+    assert(sc.schedulerBackend.isInstanceOf[FakeSchedulerBackend])
+    assert(sc.taskScheduler.isInstanceOf[FakeScheduler])
+  }
+}
+
+class CheckExternalClusterManager extends ExternalClusterManager {
+
+  def canCreate(masterURL: String): Boolean = masterURL == "myclusterManager"
+
+  def createTaskScheduler(sc: SparkContext): TaskScheduler = new FakeScheduler
+
+  def createSchedulerBackend(sc: SparkContext, scheduler: TaskScheduler): SchedulerBackend =
+    new FakeSchedulerBackend()
+
+  def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = {}
+
+}
+
+class FakeScheduler extends TaskScheduler {
+  override def rootPool: Pool = null
+  override def schedulingMode: SchedulingMode = SchedulingMode.NONE
+  override def start(): Unit = {}
+  override def stop(): Unit = {}
+  override def submitTasks(taskSet: TaskSet): Unit = {}
+  override def cancelTasks(stageId: Int, interruptThread: Boolean) {}
+  override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
+  override def defaultParallelism(): Int = 2
+  override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
+  override def applicationAttemptId(): Option[String] = None
+  def executorHeartbeatReceived(
+      execId: String,
+      accumUpdates: Array[(Long, Seq[AccumulableInfo])],
+      blockManagerId: BlockManagerId): Boolean = true
+}
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
@@ -98,3 +98,4 @@ LZ4BlockInputStream.java
 spark-deps-.*
 .*csv
 .*tsv
+org.apache.spark.scheduler.ExternalClusterManager
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		org.apache.spark.scheduler.CheckExternalClusterManager
Copy link Contributor tejasapatil Mar 24, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. why is this file needed ? Copy link Author hbhanawat Mar 24, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. To instantiate the dummy cluster manager in the test.
-Original file line number
+Diff line change
@@ Expand Up / @@ -98,3 +98,4 @@ LZ4BlockInputStream.java @@
     spark-deps-.*
     .*csv
     .*tsv
+    org.apache.spark.scheduler.ExternalClusterManager