apache
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 6 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala‎
Lines changed: 15 additions & 10 deletions b/‎core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala‎
Lines changed: 15 additions & 10 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala‎
Lines changed: 10 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala‎
Lines changed: 0 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala‎
Lines changed: 7 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/DiskStore.scala‎
Lines changed: 15 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/storage/DiskStore.scala‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 10 additions & 10 deletions b/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎dev/merge_spark_pr.py‎
Lines changed: 1 addition & 2 deletions b/‎dev/merge_spark_pr.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎docs/building-spark.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/building-spark.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala‎
Lines changed: 1 addition & 1 deletion
@@ -779,20 +779,20 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values
    * with `+=`. Only the driver can access the accumuable's `value`.
-   * @tparam T accumulator type
-   * @tparam R type that can be added to the accumulator
+   * @tparam R accumulator result type
+   * @tparam T type that can be added to the accumulator
    */
-  def accumulable[T, R](initialValue: T)(implicit param: AccumulableParam[T, R]) =
+  def accumulable[R, T](initialValue: R)(implicit param: AccumulableParam[R, T]) =
     new Accumulable(initialValue, param)
 
   /**
    * Create an [[org.apache.spark.Accumulable]] shared variable, with a name for display in the
    * Spark UI. Tasks can add values to the accumuable using the `+=` operator. Only the driver can
    * access the accumuable's `value`.
-   * @tparam T accumulator type
-   * @tparam R type that can be added to the accumulator
+   * @tparam R accumulator result type
+   * @tparam T type that can be added to the accumulator
    */
-  def accumulable[T, R](initialValue: T, name: String)(implicit param: AccumulableParam[T, R]) =
+  def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) =
     new Accumulable(initialValue, param, Some(name))
 
   /**
 
@@ -163,18 +163,23 @@ private[broadcast] object HttpBroadcast extends Logging {
 
   private def write(id: Long, value: Any) {
     val file = getFile(id)
-    val out: OutputStream = {
-      if (compress) {
-        compressionCodec.compressedOutputStream(new FileOutputStream(file))
-      } else {
-        new BufferedOutputStream(new FileOutputStream(file), bufferSize)
+    val fileOutputStream = new FileOutputStream(file)
+    try {
+      val out: OutputStream = {
+        if (compress) {
+          compressionCodec.compressedOutputStream(fileOutputStream)
+        } else {
+          new BufferedOutputStream(fileOutputStream, bufferSize)
+        }
       }
+      val ser = SparkEnv.get.serializer.newInstance()
+      val serOut = ser.serializeStream(out)
+      serOut.writeObject(value)
+      serOut.close()
+      files += file
+    } finally {
+      fileOutputStream.close()
     }
-    val ser = SparkEnv.get.serializer.newInstance()
-    val serOut = ser.serializeStream(out)
-    serOut.writeObject(value)
-    serOut.close()
-    files += file
   }
 
   private def read[T: ClassTag](id: Long): T = {
 
@@ -83,15 +83,21 @@ private[spark] class FileSystemPersistenceEngine(
     val serialized = serializer.toBinary(value)
 
     val out = new FileOutputStream(file)
-    out.write(serialized)
-    out.close()
+    try {
+      out.write(serialized)
+    } finally {
+      out.close()
+    }
   }
 
   def deserializeFromFile[T](file: File)(implicit m: Manifest[T]): T = {
     val fileData = new Array[Byte](file.length().asInstanceOf[Int])
     val dis = new DataInputStream(new FileInputStream(file))
-    dis.readFully(fileData)
-    dis.close()
+    try {
+      dis.readFully(fileData)
+    } finally {
+      dis.close()
+    }
 
     val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
 
@@ -29,7 +29,6 @@ import scala.language.postfixOps
 
 import akka.actor._
 import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
-import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
 
@@ -372,6 +372,13 @@ private[spark] class MesosSchedulerBackend(
     recordSlaveLost(d, slaveId, ExecutorExited(status))
   }
 
+  override def killTask(taskId: Long, executorId: String, interruptThread: Boolean): Unit = {
+    driver.killTask(
+      TaskID.newBuilder()
+        .setValue(taskId.toString).build()
+    )
+  }
+
   // TODO: query Mesos for number of cores
   override def defaultParallelism() = sc.conf.getInt("spark.default.parallelism", 8)
 
 
@@ -73,7 +73,21 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
     val outputStream = new FileOutputStream(file)
-    blockManager.dataSerializeStream(blockId, outputStream, values)
+    try {
+      try {
+        blockManager.dataSerializeStream(blockId, outputStream, values)
+      } finally {
+        // Close outputStream here because it should be closed before file is deleted.
+        outputStream.close()
+      }
+    } catch {
+      case e: Throwable =>
+        if (file.exists()) {
+          file.delete()
+        }
+        throw e
+    }
+
     val length = file.length
 
     val timeTaken = System.currentTimeMillis - startTime
 
@@ -35,8 +35,6 @@ import scala.util.control.{ControlThrowable, NonFatal}
 
 import com.google.common.io.Files
 import com.google.common.util.concurrent.ThreadFactoryBuilder
-import org.apache.commons.io.FileUtils
-import org.apache.commons.io.filefilter.TrueFileFilter
 import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.log4j.PropertyConfigurator
@@ -710,18 +708,20 @@ private[spark] object Utils extends Logging {
    * Determines if a directory contains any files newer than cutoff seconds.
    * 
    * @param dir must be the path to a directory, or IllegalArgumentException is thrown
-   * @param cutoff measured in seconds. Returns true if there are any files in dir newer than this.
+   * @param cutoff measured in seconds. Returns true if there are any files or directories in the
+   *               given directory whose last modified time is later than this many seconds ago
    */
   def doesDirectoryContainAnyNewFiles(dir: File, cutoff: Long): Boolean = {
-    val currentTimeMillis = System.currentTimeMillis
     if (!dir.isDirectory) {
-      throw new IllegalArgumentException (dir + " is not a directory!")
-    } else {
-      val files = FileUtils.listFilesAndDirs(dir, TrueFileFilter.TRUE, TrueFileFilter.TRUE)
-      val cutoffTimeInMillis = (currentTimeMillis - (cutoff * 1000))
-      val newFiles = files.filter { _.lastModified > cutoffTimeInMillis }
-      newFiles.nonEmpty
+      throw new IllegalArgumentException("$dir is not a directory!")
     }
+    val filesAndDirs = dir.listFiles()
+    val cutoffTimeInMillis = System.currentTimeMillis - (cutoff * 1000)
+
+    filesAndDirs.exists(_.lastModified() > cutoffTimeInMillis) ||
+    filesAndDirs.filter(_.isDirectory).exists(
+      subdir => doesDirectoryContainAnyNewFiles(subdir, cutoff)
+    )
   }
 
   /**
 
@@ -73,11 +73,10 @@ def fail(msg):
 
 
 def run_cmd(cmd):
+    print cmd
     if isinstance(cmd, list):
-        print " ".join(cmd)
         return subprocess.check_output(cmd)
     else:
-        print cmd
         return subprocess.check_output(cmd.split(" "))
 
 
 
@@ -171,6 +171,21 @@ can be set to control the SBT build. For example:
 
     sbt/sbt -Pyarn -Phadoop-2.3 assembly
 
+# Testing with SBT
+
+Some of the tests require Spark to be packaged first, so always run `sbt/sbt assembly` the first time.  The following is an example of a correct (build, test) sequence:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive assembly
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive test
+
+To run only a specific test suite as follows:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive "test-only org.apache.spark.repl.ReplSuite"
+
+To run test suites of a specific sub project as follows:
+
+    sbt/sbt -Pyarn -Phadoop-2.3 -Phive core/test
+
 # Speeding up Compilation with Zinc
 
 [Zinc](https://github.com/typesafehub/zinc) is a long-running server version of SBT's incremental
 
@@ -33,7 +33,7 @@ object ScalaReflection {
 
   /** Converts Scala objects to catalyst rows / types */
   def convertToCatalyst(a: Any): Any = a match {
-    case o: Option[_] => o.orNull
+    case o: Option[_] => o.map(convertToCatalyst).orNull
     case s: Seq[_] => s.map(convertToCatalyst)
     case m: Map[_, _] => m.map { case (k, v) => convertToCatalyst(k) -> convertToCatalyst(v) }
     case p: Product => new GenericRow(p.productIterator.map(convertToCatalyst).toArray)