apache · eyalfa · Jul 5, 2017 · Jul 30, 2017 · Aug 1, 2017 · Aug 5, 2017
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -27,7 +27,7 @@ import java.util.concurrent.ConcurrentHashMap
 import scala.collection.mutable.ListBuffer
 
 import com.google.common.io.{ByteStreams, Closeables, Files}
-import io.netty.channel.FileRegion
+import io.netty.channel.{DefaultFileRegion, FileRegion}
 import io.netty.util.AbstractReferenceCounted
 
 import org.apache.spark.{SecurityManager, SparkConf}
@@ -108,25 +108,7 @@ private[spark] class DiskStore(
         new EncryptedBlockData(file, blockSize, conf, key)
 
       case _ =>
-        val channel = new FileInputStream(file).getChannel()
-        if (blockSize < minMemoryMapBytes) {
-          // For small files, directly read rather than memory map.
-          Utils.tryWithSafeFinally {
-            val buf = ByteBuffer.allocate(blockSize.toInt)
-            JavaUtils.readFully(channel, buf)
-            buf.flip()
-            new ByteBufferBlockData(new ChunkedByteBuffer(buf), true)
-          } {
-            channel.close()
-          }
-        } else {
-          Utils.tryWithSafeFinally {
-            new ByteBufferBlockData(
-              new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length)), true)
-          } {
-            channel.close()
-          }
-        }
+        new DiskBlockData(conf, file, blockSize)
     }
   }
 
@@ -165,6 +147,62 @@ private[spark] class DiskStore(
 
 }
 
+private class DiskBlockData(
+    conf: SparkConf,
+    file: File,
+    blockSize: Long) extends BlockData {
+
+  private val minMemoryMapBytes = conf.getSizeAsBytes("spark.storage.memoryMapThreshold", "2m")
+
+  override def toInputStream(): InputStream = new FileInputStream(file)
+
+  /**
+  * Returns a Netty-friendly wrapper for the block's data.
+  *
+  * Please see `ManagedBuffer.convertToNetty()` for more details.
+  */
+  override def toNetty(): AnyRef = new DefaultFileRegion(file, 0, size)
+
+  override def toChunkedByteBuffer(allocator: (Int) => ByteBuffer): ChunkedByteBuffer = {
+    Utils.tryWithResource(open()) { channel =>
+      var remaining = blockSize
+      val chunks = new ListBuffer[ByteBuffer]()
+      while (remaining > 0) {
+        val chunkSize = math.min(remaining, Int.MaxValue)
+        val chunk = allocator(chunkSize.toInt)
+        remaining -= chunkSize
+        JavaUtils.readFully(channel, chunk)
+        chunk.flip()
+        chunks += chunk
+      }
+      new ChunkedByteBuffer(chunks.toArray)
+    }
+  }
+
+  override def toByteBuffer(): ByteBuffer = {
+    require( blockSize < Int.MaxValue
+      , s"can't create a byte buffer of size $blockSize"
+        + s" since it exceeds Int.MaxValue ${Int.MaxValue}.")
+    Utils.tryWithResource(open()) { channel =>
+      if (blockSize < minMemoryMapBytes) {
+        // For small files, directly read rather than memory map.
+        val buf = ByteBuffer.allocate(blockSize.toInt)
+        JavaUtils.readFully(channel, buf)
+        buf.flip()
+        buf
+      } else {
+        channel.map(MapMode.READ_ONLY, 0, file.length)
+      }
+    }
+  }
+
+  override def size: Long = blockSize
+
+  override def dispose(): Unit = {}
+
+  private def open() = new FileInputStream(file).getChannel
+}
+
 private class EncryptedBlockData(
     file: File,
     blockSize: Long,

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -1415,6 +1415,79 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       super.fetchBlockSync(host, port, execId, blockId)
     }
   }
+
+  def testGetOrElseUpdateForLargeBlock(storageLevel: StorageLevel) {
+    store = makeBlockManager(6L * 1024 * 1024 * 1024, "exec1")
+    def mkBlobs() = {
+      val rng = new java.util.Random(42)
+      val buff = new Array[Byte](1024 * 1024)
+      rng.nextBytes(buff)
+      Iterator.fill(2 * 1024 + 1) {
+        buff
+      }
+    }
+    val res1 = store.getOrElseUpdate(
+      RDDBlockId(42, 0),
+      storageLevel,
+      implicitly[ClassTag[Array[Byte]]],
+      mkBlobs _
+    )
+    withClue(res1) {
+      assert(res1.isLeft)
+      assert(res1.left.get.data.zipAll(mkBlobs(), null, null).forall {
+        case (a, b) =>
+          a != null &&
+            b != null &&
+            a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq
+      })
+    }
+    val getResult = store.get(RDDBlockId(42, 0))
+    withClue(getResult) {
+      assert(getResult.isDefined)
+      assert(getResult.get.data.zipAll(mkBlobs(), null, null).forall {
+        case (a, b) =>
+          a != null &&
+            b != null &&
+            a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq
+      })
+    }
+    val getBlockRes = store.getBlockData(RDDBlockId(42, 0))
+    withClue(getBlockRes) {
+      try {
+        assert(getBlockRes.size() >= 2 * 1024 * 1024 * 1024)
+        Utils.tryWithResource(getBlockRes.createInputStream()) { inpStrm =>
+          val iter = store
+            .serializerManager
+            .dataDeserializeStream(RDDBlockId(42, 0)
+              , inpStrm)(implicitly[ClassTag[Array[Byte]]])
+          assert(iter.zipAll(mkBlobs(), null, null).forall {
+            case (a, b) =>
+              a != null &&
+                b != null &&
+                a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq
+          })
+        }
+      } finally {
+        getBlockRes.release()
+      }
+    }
+  }
+
+  test("getOrElseUpdate > 2gb, storage level = disk only") {
+    testGetOrElseUpdateForLargeBlock(StorageLevel.DISK_ONLY)
+  }
+
+  test("getOrElseUpdate > 2gb, storage level = memory deserialized") {
+    testGetOrElseUpdateForLargeBlock(StorageLevel.MEMORY_ONLY)
+  }
+
+  test("getOrElseUpdate > 2gb, storage level = off-heap") {
+    testGetOrElseUpdateForLargeBlock(StorageLevel.OFF_HEAP)
+  }
+
+  test("getOrElseUpdate > 2gb, storage level = memory serialized") {
+    testGetOrElseUpdateForLargeBlock(StorageLevel.MEMORY_ONLY_SER)
+  }
 }
 
 private object BlockManagerSuite {

diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -50,18 +50,18 @@ class DiskStoreSuite extends SparkFunSuite {
     val diskStoreMapped = new DiskStore(conf.clone().set(confKey, "0"), diskBlockManager,
       securityManager)
     diskStoreMapped.putBytes(blockId, byteBuffer)
-    val mapped = diskStoreMapped.getBytes(blockId).asInstanceOf[ByteBufferBlockData].buffer
+    val mapped = diskStoreMapped.getBytes(blockId).toByteBuffer()
     assert(diskStoreMapped.remove(blockId))
 
     val diskStoreNotMapped = new DiskStore(conf.clone().set(confKey, "1m"), diskBlockManager,
       securityManager)
     diskStoreNotMapped.putBytes(blockId, byteBuffer)
-    val notMapped = diskStoreNotMapped.getBytes(blockId).asInstanceOf[ByteBufferBlockData].buffer
+    val notMapped = diskStoreNotMapped.getBytes(blockId).toByteBuffer()
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
-    assert(notMapped.getChunks().forall(_.getClass.getName.endsWith("HeapByteBuffer")),
+    assert(notMapped.getClass.getName.endsWith("HeapByteBuffer"),
       "Expected HeapByteBuffer for un-mapped read")
-    assert(mapped.getChunks().forall(_.isInstanceOf[MappedByteBuffer]),
+    assert(mapped.isInstanceOf[MappedByteBuffer],
       "Expected MappedByteBuffer for mapped read")
 
     def arrayFromByteBuffer(in: ByteBuffer): Array[Byte] = {
@@ -70,8 +70,8 @@ class DiskStoreSuite extends SparkFunSuite {
       array
     }
 
-    assert(Arrays.equals(mapped.toArray, bytes))
-    assert(Arrays.equals(notMapped.toArray, bytes))
+    assert(Arrays.equals(new ChunkedByteBuffer(mapped).toArray, bytes))
+    assert(Arrays.equals(new ChunkedByteBuffer(notMapped).toArray, bytes))
   }
 
   test("block size tracking") {
@@ -92,6 +92,31 @@ class DiskStoreSuite extends SparkFunSuite {
     assert(diskStore.getSize(blockId) === 0L)
   }
 
+  test("blocks larger than 2gb") {
+    val conf = new SparkConf()
+    val diskBlockManager = new DiskBlockManager(conf, deleteFilesOnStop = true)
+    val diskStore = new DiskStore(conf, diskBlockManager, new SecurityManager(conf))
+
+    val mb = 1024 * 1024
+    val gb = 1024L * mb
+
+    val blockId = BlockId("rdd_1_2")
+    diskStore.put(blockId) { chan =>
+      val arr = new Array[Byte](mb)
+      for {
+        _ <- 0 until 3072
+      } {
+        val buf = ByteBuffer.wrap(arr)
+        while (buf.hasRemaining()) {
+          chan.write(buf)
+        }
+      }
+    }
+
+    val blockData = diskStore.getBytes(blockId)
+    assert(blockData.size == 3 * gb)
+  }
+
   test("block data encryption") {
     val testDir = Utils.createTempDir()
     val testData = new Array[Byte](128 * 1024)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -790,7 +790,7 @@ object TestSettings {
     javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx3g -Xss4096k"
+    javaOptions in Test ++= "-Xmx6g -Xss4096k"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Exclude tags defined in a system property