-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-3151] [Block Manager] DiskStore.getBytes fails for files larger than 2GB #18855
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
fc3f1d7
8468738
1580449
c5028f5
908c786
67f4259
8338b4e
4a320e6
5a5c344
6cbe8d0
f6fb9e9
0e3cd82
a911c85
c877dcf
8be899f
732073c
d0c98a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,7 @@ import java.util.concurrent.ConcurrentHashMap | |
| import scala.collection.mutable.ListBuffer | ||
|
|
||
| import com.google.common.io.{ByteStreams, Closeables, Files} | ||
| import io.netty.channel.FileRegion | ||
| import io.netty.channel.{DefaultFileRegion, FileRegion} | ||
| import io.netty.util.AbstractReferenceCounted | ||
|
|
||
| import org.apache.spark.{SecurityManager, SparkConf} | ||
|
|
@@ -108,25 +108,7 @@ private[spark] class DiskStore( | |
| new EncryptedBlockData(file, blockSize, conf, key) | ||
|
|
||
| case _ => | ||
| val channel = new FileInputStream(file).getChannel() | ||
| if (blockSize < minMemoryMapBytes) { | ||
| // For small files, directly read rather than memory map. | ||
| Utils.tryWithSafeFinally { | ||
| val buf = ByteBuffer.allocate(blockSize.toInt) | ||
| JavaUtils.readFully(channel, buf) | ||
| buf.flip() | ||
| new ByteBufferBlockData(new ChunkedByteBuffer(buf), true) | ||
| } { | ||
| channel.close() | ||
| } | ||
| } else { | ||
| Utils.tryWithSafeFinally { | ||
| new ByteBufferBlockData( | ||
| new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length)), true) | ||
| } { | ||
| channel.close() | ||
| } | ||
| } | ||
| new DiskBlockData(conf, file, blockSize) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -165,6 +147,62 @@ private[spark] class DiskStore( | |
|
|
||
| } | ||
|
|
||
| private class DiskBlockData( | ||
| conf: SparkConf, | ||
| file: File, | ||
| blockSize: Long) extends BlockData { | ||
|
|
||
| private val minMemoryMapBytes = conf.getSizeAsBytes("spark.storage.memoryMapThreshold", "2m") | ||
|
|
||
| override def toInputStream(): InputStream = new FileInputStream(file) | ||
|
|
||
| /** | ||
| * Returns a Netty-friendly wrapper for the block's data. | ||
| * | ||
| * Please see `ManagedBuffer.convertToNetty()` for more details. | ||
| */ | ||
| override def toNetty(): AnyRef = new DefaultFileRegion(file, 0, size) | ||
|
|
||
| override def toChunkedByteBuffer(allocator: (Int) => ByteBuffer): ChunkedByteBuffer = { | ||
| Utils.tryWithResource(open()) { channel => | ||
| var remaining = blockSize | ||
| val chunks = new ListBuffer[ByteBuffer]() | ||
| while (remaining > 0) { | ||
| val chunkSize = math.min(remaining, Int.MaxValue) | ||
| val chunk = allocator(chunkSize.toInt) | ||
| remaining -= chunkSize | ||
| JavaUtils.readFully(channel, chunk) | ||
| chunk.flip() | ||
| chunks += chunk | ||
| } | ||
| new ChunkedByteBuffer(chunks.toArray) | ||
| } | ||
| } | ||
|
|
||
| override def toByteBuffer(): ByteBuffer = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we will still hit the 2g limitation here, I'm wondering which end-to-end use cases are affected by it.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indeed. further more, I think this plays well with the comment about future deprecation of
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan |
||
| require( blockSize < Int.MaxValue | ||
|
||
| , s"can't create a byte buffer of size $blockSize" | ||
| + s" since it exceeds Int.MaxValue ${Int.MaxValue}.") | ||
| Utils.tryWithResource(open()) { channel => | ||
| if (blockSize < minMemoryMapBytes) { | ||
| // For small files, directly read rather than memory map. | ||
| val buf = ByteBuffer.allocate(blockSize.toInt) | ||
| JavaUtils.readFully(channel, buf) | ||
| buf.flip() | ||
| buf | ||
| } else { | ||
| channel.map(MapMode.READ_ONLY, 0, file.length) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def size: Long = blockSize | ||
|
|
||
| override def dispose(): Unit = {} | ||
|
|
||
| private def open() = new FileInputStream(file).getChannel | ||
| } | ||
|
|
||
| private class EncryptedBlockData( | ||
| file: File, | ||
| blockSize: Long, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1415,6 +1415,79 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE | |
| super.fetchBlockSync(host, port, execId, blockId) | ||
| } | ||
| } | ||
|
|
||
| def testGetOrElseUpdateForLargeBlock(storageLevel: StorageLevel) { | ||
|
||
| store = makeBlockManager(6L * 1024 * 1024 * 1024, "exec1") | ||
| def mkBlobs() = { | ||
| val rng = new java.util.Random(42) | ||
| val buff = new Array[Byte](1024 * 1024) | ||
| rng.nextBytes(buff) | ||
| Iterator.fill(2 * 1024 + 1) { | ||
| buff | ||
| } | ||
| } | ||
| val res1 = store.getOrElseUpdate( | ||
| RDDBlockId(42, 0), | ||
| storageLevel, | ||
| implicitly[ClassTag[Array[Byte]]], | ||
| mkBlobs _ | ||
| ) | ||
| withClue(res1) { | ||
|
||
| assert(res1.isLeft) | ||
| assert(res1.left.get.data.zipAll(mkBlobs(), null, null).forall { | ||
| case (a, b) => | ||
|
||
| a != null && | ||
| b != null && | ||
| a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq | ||
| }) | ||
| } | ||
| val getResult = store.get(RDDBlockId(42, 0)) | ||
| withClue(getResult) { | ||
| assert(getResult.isDefined) | ||
| assert(getResult.get.data.zipAll(mkBlobs(), null, null).forall { | ||
| case (a, b) => | ||
| a != null && | ||
| b != null && | ||
| a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq | ||
| }) | ||
| } | ||
| val getBlockRes = store.getBlockData(RDDBlockId(42, 0)) | ||
| withClue(getBlockRes) { | ||
| try { | ||
| assert(getBlockRes.size() >= 2 * 1024 * 1024 * 1024) | ||
| Utils.tryWithResource(getBlockRes.createInputStream()) { inpStrm => | ||
| val iter = store | ||
| .serializerManager | ||
| .dataDeserializeStream(RDDBlockId(42, 0) | ||
| , inpStrm)(implicitly[ClassTag[Array[Byte]]]) | ||
|
||
| assert(iter.zipAll(mkBlobs(), null, null).forall { | ||
| case (a, b) => | ||
| a != null && | ||
| b != null && | ||
| a.asInstanceOf[Array[Byte]].seq == b.asInstanceOf[Array[Byte]].seq | ||
| }) | ||
| } | ||
| } finally { | ||
| getBlockRes.release() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| test("getOrElseUpdate > 2gb, storage level = disk only") { | ||
|
||
| testGetOrElseUpdateForLargeBlock(StorageLevel.DISK_ONLY) | ||
| } | ||
|
|
||
| test("getOrElseUpdate > 2gb, storage level = memory deserialized") { | ||
| testGetOrElseUpdateForLargeBlock(StorageLevel.MEMORY_ONLY) | ||
| } | ||
|
|
||
| test("getOrElseUpdate > 2gb, storage level = off-heap") { | ||
| testGetOrElseUpdateForLargeBlock(StorageLevel.OFF_HEAP) | ||
| } | ||
|
|
||
| test("getOrElseUpdate > 2gb, storage level = memory serialized") { | ||
| testGetOrElseUpdateForLargeBlock(StorageLevel.MEMORY_ONLY_SER) | ||
| } | ||
| } | ||
|
|
||
| private object BlockManagerSuite { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,18 +50,18 @@ class DiskStoreSuite extends SparkFunSuite { | |
| val diskStoreMapped = new DiskStore(conf.clone().set(confKey, "0"), diskBlockManager, | ||
| securityManager) | ||
| diskStoreMapped.putBytes(blockId, byteBuffer) | ||
| val mapped = diskStoreMapped.getBytes(blockId).asInstanceOf[ByteBufferBlockData].buffer | ||
| val mapped = diskStoreMapped.getBytes(blockId).toByteBuffer() | ||
| assert(diskStoreMapped.remove(blockId)) | ||
|
|
||
| val diskStoreNotMapped = new DiskStore(conf.clone().set(confKey, "1m"), diskBlockManager, | ||
| securityManager) | ||
| diskStoreNotMapped.putBytes(blockId, byteBuffer) | ||
| val notMapped = diskStoreNotMapped.getBytes(blockId).asInstanceOf[ByteBufferBlockData].buffer | ||
| val notMapped = diskStoreNotMapped.getBytes(blockId).toByteBuffer() | ||
|
|
||
| // Not possible to do isInstanceOf due to visibility of HeapByteBuffer | ||
| assert(notMapped.getChunks().forall(_.getClass.getName.endsWith("HeapByteBuffer")), | ||
| assert(notMapped.getClass.getName.endsWith("HeapByteBuffer"), | ||
| "Expected HeapByteBuffer for un-mapped read") | ||
| assert(mapped.getChunks().forall(_.isInstanceOf[MappedByteBuffer]), | ||
| assert(mapped.isInstanceOf[MappedByteBuffer], | ||
| "Expected MappedByteBuffer for mapped read") | ||
|
|
||
| def arrayFromByteBuffer(in: ByteBuffer): Array[Byte] = { | ||
|
|
@@ -70,8 +70,8 @@ class DiskStoreSuite extends SparkFunSuite { | |
| array | ||
| } | ||
|
|
||
| assert(Arrays.equals(mapped.toArray, bytes)) | ||
| assert(Arrays.equals(notMapped.toArray, bytes)) | ||
| assert(Arrays.equals(new ChunkedByteBuffer(mapped).toArray, bytes)) | ||
| assert(Arrays.equals(new ChunkedByteBuffer(notMapped).toArray, bytes)) | ||
| } | ||
|
|
||
| test("block size tracking") { | ||
|
|
@@ -92,6 +92,31 @@ class DiskStoreSuite extends SparkFunSuite { | |
| assert(diskStore.getSize(blockId) === 0L) | ||
| } | ||
|
|
||
| test("blocks larger than 2gb") { | ||
| val conf = new SparkConf() | ||
| val diskBlockManager = new DiskBlockManager(conf, deleteFilesOnStop = true) | ||
| val diskStore = new DiskStore(conf, diskBlockManager, new SecurityManager(conf)) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: remove this empty line |
||
| val mb = 1024 * 1024 | ||
| val gb = 1024L * mb | ||
|
|
||
| val blockId = BlockId("rdd_1_2") | ||
| diskStore.put(blockId) { chan => | ||
| val arr = new Array[Byte](mb) | ||
| for { | ||
| _ <- 0 until 3072 | ||
| } { | ||
| val buf = ByteBuffer.wrap(arr) | ||
| while (buf.hasRemaining()) { | ||
| chan.write(buf) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| val blockData = diskStore.getBytes(blockId) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kiszk, this is the test case I was referring to. |
||
| assert(blockData.size == 3 * gb) | ||
| } | ||
|
|
||
| test("block data encryption") { | ||
| val testDir = Utils.createTempDir() | ||
| val testData = new Array[Byte](128 * 1024) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -790,7 +790,7 @@ object TestSettings { | |
| javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark")) | ||
| .map { case (k,v) => s"-D$k=$v" }.toSeq, | ||
| javaOptions in Test += "-ea", | ||
| javaOptions in Test ++= "-Xmx3g -Xss4096k" | ||
| javaOptions in Test ++= "-Xmx6g -Xss4096k" | ||
|
||
| .split(" ").toSeq, | ||
| javaOptions += "-Xmx3g", | ||
| // Exclude tags defined in a system property | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can pass in
minMemoryMapBytesdirectly.