apache · RongGu · Mar 16, 2014 · Mar 17, 2014 · Mar 21, 2014 · Mar 21, 2014
diff --git a/core/pom.xml b/core/pom.xml
@@ -205,6 +205,27 @@
       <artifactId>commons-io</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.tachyonproject</groupId>
+      <artifactId>tachyon</artifactId>
+      <version>0.4.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+        </exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        <exclusion>
+          <groupId>org.apache.curator</groupId>
+          <artifactId>curator-recipes</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.curator</groupId>
+          <artifactId>curator-test</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
       <artifactId>scalatest_${scala.binary.version}</artifactId>

diff --git a/core/src/main/java/org/apache/spark/api/java/StorageLevels.java b/core/src/main/java/org/apache/spark/api/java/StorageLevels.java
@@ -23,17 +23,19 @@
  * Expose some commonly useful storage level constants.
  */
 public class StorageLevels {
-  public static final StorageLevel NONE = create(false, false, false, 1);
-  public static final StorageLevel DISK_ONLY = create(true, false, false, 1);
-  public static final StorageLevel DISK_ONLY_2 = create(true, false, false, 2);
-  public static final StorageLevel MEMORY_ONLY = create(false, true, true, 1);
-  public static final StorageLevel MEMORY_ONLY_2 = create(false, true, true, 2);
-  public static final StorageLevel MEMORY_ONLY_SER = create(false, true, false, 1);
-  public static final StorageLevel MEMORY_ONLY_SER_2 = create(false, true, false, 2);
-  public static final StorageLevel MEMORY_AND_DISK = create(true, true, true, 1);
-  public static final StorageLevel MEMORY_AND_DISK_2 = create(true, true, true, 2);
-  public static final StorageLevel MEMORY_AND_DISK_SER = create(true, true, false, 1);
-  public static final StorageLevel MEMORY_AND_DISK_SER_2 = create(true, true, false, 2);
+  public static final StorageLevel NONE = new StorageLevel(false, false, false, false, 1);
+  public static final StorageLevel DISK_ONLY = new StorageLevel(true, false, false, false, 1);
+  public static final StorageLevel DISK_ONLY_2 = new StorageLevel(true, false, false, false, 2);
+  public static final StorageLevel MEMORY_ONLY = new StorageLevel(false, true, false, true, 1);
+  public static final StorageLevel MEMORY_ONLY_2 = new StorageLevel(false, true, false, true, 2);
+  public static final StorageLevel MEMORY_ONLY_SER = new StorageLevel(false, true, false, false, 1);
+  public static final StorageLevel MEMORY_ONLY_SER_2 = new StorageLevel(false, true, false, false, 2);
+  public static final StorageLevel MEMORY_AND_DISK = new StorageLevel(true, true, false, true, 1);
+  public static final StorageLevel MEMORY_AND_DISK_2 = new StorageLevel(true, true, false, true, 2);
+  public static final StorageLevel MEMORY_AND_DISK_SER = new StorageLevel(true, true, false, false, 1);
+  public static final StorageLevel MEMORY_AND_DISK_SER_2 = new StorageLevel(true, true, false, false, 2);
+
+  public static final StorageLevel TACHYON = new StorageLevel(false, false, true, false, 1);
 
   /**
    * Create a new StorageLevel object.
@@ -42,7 +44,12 @@ public class StorageLevels {
    * @param deserialized saved as deserialized objects, if true
    * @param replication replication factor
    */
-  public static StorageLevel create(boolean useDisk, boolean useMemory, boolean deserialized, int replication) {
-    return StorageLevel.apply(useDisk, useMemory, deserialized, replication);
+  public static StorageLevel create(
+    boolean useDisk,
+    boolean useMemory,
+    boolean useTachyon,
+    boolean deserialized,
+    int replication) {
+    return StorageLevel.apply(useDisk, useMemory, useTachyon, deserialized, replication);
   }
 }
diff --git a/core/src/main/java/org/apache/spark/network/netty/TachyonFilePathResolver.java b/core/src/main/java/org/apache/spark/network/netty/TachyonFilePathResolver.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty;
+
+import org.apache.spark.storage.BlockId;
+import org.apache.spark.storage.TachyonFileSegment;
+
+public interface TachyonFilePathResolver {
+  /** Get the file segment in which the given block resides. */
+  TachyonFileSegment getBlockLocation(BlockId blockId);
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -139,7 +139,8 @@ class SparkContext(
     conf.get("spark.driver.host"),
     conf.get("spark.driver.port").toInt,
     isDriver = true,
-    isLocal = isLocal)
+    isLocal = isLocal,
+    "<driver>" + appName)
   SparkEnv.set(env)
 
   // Used to store a URL for each static file/jar together with the file's local timestamp

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -40,6 +40,7 @@ import org.apache.spark.util.{AkkaUtils, Utils}
  */
 class SparkEnv private[spark] (
     val executorId: String,
+    val appId: String,
     val actorSystem: ActorSystem,
     val serializerManager: SerializerManager,
     val serializer: Serializer,
@@ -121,7 +122,8 @@ object SparkEnv extends Logging {
       hostname: String,
       port: Int,
       isDriver: Boolean,
-      isLocal: Boolean): SparkEnv = {
+      isLocal: Boolean,
+      appId: String = null): SparkEnv = {
 
     val securityManager = new SecurityManager(conf)
     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port, conf = conf,
@@ -169,7 +171,7 @@ object SparkEnv extends Logging {
       "BlockManagerMaster",
       new BlockManagerMasterActor(isLocal, conf)), conf)
     val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, 
-      serializer, conf, securityManager)
+      serializer, conf, securityManager, appId)
 
     val connectionManager = blockManager.connectionManager
 
@@ -219,6 +221,7 @@ object SparkEnv extends Logging {
 
     new SparkEnv(
       executorId,
+      appId,
       actorSystem,
       serializerManager,
       serializer,

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -92,6 +92,7 @@ private[spark] class ExecutorRunner(
   def substituteVariables(argument: String): String = argument match {
     case "{{WORKER_URL}}" => workerUrl
     case "{{EXECUTOR_ID}}" => execId.toString
+    case "{{APP_ID}}" => appId.toString
     case "{{HOSTNAME}}" => host
     case "{{CORES}}" => cores.toString
     case other => other

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -31,6 +31,7 @@ import org.apache.spark.util.{AkkaUtils, Utils}
 private[spark] class CoarseGrainedExecutorBackend(
     driverUrl: String,
     executorId: String,
+    appId: String,
     hostPort: String,
     cores: Int)
   extends Actor
@@ -53,7 +54,8 @@ private[spark] class CoarseGrainedExecutorBackend(
     case RegisteredExecutor(sparkProperties) =>
       logInfo("Successfully registered with driver")
       // Make this host instead of hostPort ?
-      executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties)
+      executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties, 
+        false, appId)
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
@@ -92,7 +94,7 @@ private[spark] class CoarseGrainedExecutorBackend(
 }
 
 private[spark] object CoarseGrainedExecutorBackend {
-  def run(driverUrl: String, executorId: String, hostname: String, cores: Int,
+  def run(driverUrl: String, appId: String, executorId: String, hostname: String, cores: Int,
           workerUrl: Option[String]) {
     // Debug code
     Utils.checkHost(hostname)
@@ -105,7 +107,8 @@ private[spark] object CoarseGrainedExecutorBackend {
     // set it
     val sparkHostPort = hostname + ":" + boundPort
     actorSystem.actorOf(
-      Props(classOf[CoarseGrainedExecutorBackend], driverUrl, executorId, sparkHostPort, cores),
+      Props(classOf[CoarseGrainedExecutorBackend], driverUrl, appId, executorId,
+        sparkHostPort, cores),
       name = "Executor")
     workerUrl.foreach{ url =>
       actorSystem.actorOf(Props(classOf[WorkerWatcher], url), name = "WorkerWatcher")
@@ -118,13 +121,13 @@ private[spark] object CoarseGrainedExecutorBackend {
       case x if x < 4 =>
         System.err.println(
           // Worker url is used in spark standalone mode to enforce fate-sharing with worker
-          "Usage: CoarseGrainedExecutorBackend <driverUrl> <executorId> <hostname> " +
+          "Usage: CoarseGrainedExecutorBackend <driverUrl> <appId> <executorId> <hostname> " +
           "<cores> [<workerUrl>]")
         System.exit(1)
       case 4 =>
-        run(args(0), args(1), args(2), args(3).toInt, None)
+        run(args(0), args(1), args(2), args(3), args(4).toInt, None)
       case x if x > 4 =>
-        run(args(0), args(1), args(2), args(3).toInt, Some(args(4)))
+        run(args(0), args(1), args(2), args(3), args(4).toInt, Some(args(5)))
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -38,7 +38,8 @@ private[spark] class Executor(
     executorId: String,
     slaveHostname: String,
     properties: Seq[(String, String)],
-    isLocal: Boolean = false)
+    isLocal: Boolean = false,
+    appId: String = null)
   extends Logging
 {
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
@@ -103,7 +104,7 @@ private[spark] class Executor(
   private val env = {
     if (!isLocal) {
       val _env = SparkEnv.create(conf, executorId, slaveHostname, 0,
-        isDriver = false, isLocal = false)
+        isDriver = false, isLocal = false, appId)
       SparkEnv.set(_env)
       _env.metricsSystem.registerSource(executorSource)
       _env

diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -41,6 +41,12 @@ object ExecutorExitCode {
   /** DiskStore failed to create a local temporary directory after many attempts. */
   val DISK_STORE_FAILED_TO_CREATE_DIR = 53
 
+  /** TachyonStore failed to create a local temporary directory after many attempts. */
+  val TACHYON_STORE_FAILED_TO_INITIALIZE = 54
+
+  /** TachyonStore failed to create a local temporary directory after many attempts. */
+  val TACHYON_STORE_FAILED_TO_CREATE_DIR = 55
+
   def explainExitCode(exitCode: Int): String = {
     exitCode match {
       case UNCAUGHT_EXCEPTION => "Uncaught exception"

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -45,7 +45,8 @@ private[spark] class SparkDeploySchedulerBackend(
     val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
       conf.get("spark.driver.host"),  conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
-    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{WORKER_URL}}")
+    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{APP_ID}}", "{{HOSTNAME}}", 
+      "{{CORES}}", "{{WORKER_URL}}")
     val command = Command(
       "org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs)
     val sparkHome = sc.getSparkHome()