Skip to content

Commit 6970bc8

Browse files
committed
Add extensive tests for StorageListener and the new code in StorageUtils
1 parent e080b9e commit 6970bc8

File tree

5 files changed

+294
-37
lines changed

5 files changed

+294
-37
lines changed

core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ class StorageStatusListener extends SparkListener {
3535

3636
/** Update storage status list to reflect updated block statuses */
3737
private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
38-
val filteredStatus = executorIdToStorageStatus.get(execId)
39-
filteredStatus.foreach { storageStatus =>
38+
executorIdToStorageStatus.get(execId).foreach { storageStatus =>
4039
updatedBlocks.foreach { case (blockId, updatedStatus) =>
4140
if (updatedStatus.storageLevel == StorageLevel.NONE) {
4241
storageStatus.removeBlock(blockId)

core/src/main/scala/org/apache/spark/storage/StorageUtils.scala

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,21 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
7777
*/
7878
def blocks: Map[BlockId, BlockStatus] = _nonRddBlocks ++ rddBlocks
7979

80-
/** Return the RDD blocks stored in this block manager. */
80+
/**
81+
* Return the RDD blocks stored in this block manager.
82+
*
83+
* Note that this is somewhat expensive, as it involves cloning the underlying maps and then
84+
* concatenating them together. Much faster alternatives exist for common operations such as
85+
* getting the memory, disk, and off-heap memory sizes occupied by this RDD.
86+
*/
8187
def rddBlocks: Map[BlockId, BlockStatus] = _rddBlocks.flatMap { case (_, blocks) => blocks }
8288

8389
/** Return the blocks that belong to the given RDD stored in this block manager. */
8490
def rddBlocksById(rddId: Int): Map[BlockId, BlockStatus] = {
8591
_rddBlocks.get(rddId).getOrElse(Map.empty)
8692
}
8793

88-
/** Add the given block to this storage status. */
94+
/** Add the given block to this storage status. If it already exists, overwrite it. */
8995
def addBlock(blockId: BlockId, blockStatus: BlockStatus): Unit = {
9096
blockId match {
9197
case RDDBlockId(rddId, _) =>
@@ -162,15 +168,13 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
162168
* Return the number of blocks stored in this block manager in O(RDDs) time.
163169
* Note that this is much faster than `this.blocks.size`, which is O(blocks) time.
164170
*/
165-
def numBlocks: Int = {
166-
_nonRddBlocks.size + _rddBlocks.values.map(_.size).reduceOption(_ + _).getOrElse(0)
167-
}
171+
def numBlocks: Int = _nonRddBlocks.size + numRddBlocks
168172

169173
/**
170174
* Return the number of RDD blocks stored in this block manager in O(RDDs) time.
171175
* Note that this is much faster than `this.rddBlocks.size`, which is O(RDD blocks) time.
172176
*/
173-
def numRddBlocks: Int = _rddBlocks.keys.map(numRddBlocksById).reduceOption(_ + _).getOrElse(0)
177+
def numRddBlocks: Int = _rddBlocks.values.map(_.size).reduceOption(_ + _).getOrElse(0)
174178

175179
/**
176180
* Return the number of blocks that belong to the given RDD in O(1) time.
@@ -182,32 +186,32 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
182186
/** Return the memory used by this block manager. */
183187
def memUsed: Long = blocks.values.map(_.memSize).reduceOption(_ + _).getOrElse(0L)
184188

185-
/** Return the memory used by the given RDD in this block manager. */
186-
def memUsedByRDD(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._1).getOrElse(0L)
187-
188189
/** Return the memory remaining in this block manager. */
189190
def memRemaining: Long = maxMem - memUsed
190191

191192
/** Return the disk space used by this block manager. */
192193
def diskUsed: Long = blocks.values.map(_.diskSize).reduceOption(_ + _).getOrElse(0L)
193194

194-
/** Return the disk space used by the given RDD in this block manager. */
195-
def diskUsedByRDD(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._2).getOrElse(0L)
196-
197195
/** Return the off-heap space used by this block manager. */
198196
def offHeapUsed: Long = blocks.values.map(_.tachyonSize).reduceOption(_ + _).getOrElse(0L)
199197

200-
/** Return the off-heap space used by the given RDD in this block manager. */
198+
/** Return the memory used by the given RDD in this block manager in O(1) time. */
199+
def memUsedByRDD(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._1).getOrElse(0L)
200+
201+
/** Return the disk space used by the given RDD in this block manager in O(1) time. */
202+
def diskUsedByRDD(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._2).getOrElse(0L)
203+
204+
/** Return the off-heap space used by the given RDD in this block manager in O(1) time. */
201205
def offHeapUsedByRdd(rddId: Int): Long = _rddStorageInfo.get(rddId).map(_._3).getOrElse(0L)
202206

203207
/** Return the storage level, if any, used by the given RDD in this block manager. */
204208
def rddStorageLevel(rddId: Int): Option[StorageLevel] = _rddStorageInfo.get(rddId).map(_._4)
205209

206210
/**
207-
* Helper function to update the given RDD's storage information based on the
208-
* (possibly negative) changes in memory, disk, and off-heap memory usages.
211+
* Helper function to update the given RDD's storage information based on the (possibly
212+
* negative) changes in memory, disk, and off-heap memory usages. This is exposed for testing.
209213
*/
210-
private def updateRddStorageInfo(
214+
private[spark] def updateRddStorageInfo(
211215
rddId: Int,
212216
changeInMem: Long,
213217
changeInDisk: Long,

core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ private[ui] class StorageTab(parent: SparkUI) extends WebUITab(parent, "storage"
4141
*/
4242
@DeveloperApi
4343
class StorageListener(storageStatusListener: StorageStatusListener) extends SparkListener {
44-
private val _rddInfoMap = mutable.Map[Int, RDDInfo]()
44+
private[ui] val _rddInfoMap = mutable.Map[Int, RDDInfo]() // exposed for testing
4545

4646
def storageStatusList = storageStatusListener.storageStatusList
4747

core/src/test/scala/org/apache/spark/storage/StorageSuite.scala

Lines changed: 107 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,21 @@ import org.scalatest.FunSuite
2424
*/
2525
class StorageSuite extends FunSuite {
2626
private val memAndDisk = StorageLevel.MEMORY_AND_DISK
27+
private val memOnly = StorageLevel.MEMORY_ONLY
28+
private val diskOnly = StorageLevel.DISK_ONLY
2729

28-
// For testing add/update/removeBlock (for non-RDD blocks)
30+
// For testing add, update, and remove (for non-RDD blocks)
2931
private def storageStatus1: StorageStatus = {
3032
val status = new StorageStatus(BlockManagerId("big", "dog", 1, 1), 1000L)
3133
assert(status.blocks.isEmpty)
3234
assert(status.rddBlocks.isEmpty)
33-
assert(status.memUsed === 0)
35+
assert(status.memUsed === 0L)
3436
assert(status.memRemaining === 1000L)
35-
assert(status.diskUsed === 0)
36-
status.addBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 10L, 20L, 0L))
37-
status.addBlock(TestBlockId("fee"), BlockStatus(memAndDisk, 10L, 20L, 0L))
38-
status.addBlock(TestBlockId("faa"), BlockStatus(memAndDisk, 10L, 20L, 0L))
37+
assert(status.diskUsed === 0L)
38+
assert(status.offHeapUsed === 0L)
39+
status.addBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 10L, 20L, 1L))
40+
status.addBlock(TestBlockId("fee"), BlockStatus(memAndDisk, 10L, 20L, 1L))
41+
status.addBlock(TestBlockId("faa"), BlockStatus(memAndDisk, 10L, 20L, 1L))
3942
status
4043
}
4144

@@ -49,16 +52,18 @@ class StorageSuite extends FunSuite {
4952
assert(status.memUsed === 30L)
5053
assert(status.memRemaining === 970L)
5154
assert(status.diskUsed === 60L)
55+
assert(status.offHeapUsed === 3L)
5256
}
5357

5458
test("storage status update non-RDD blocks") {
5559
val status = storageStatus1
56-
status.updateBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 50L, 100L, 0L))
60+
status.updateBlock(TestBlockId("foo"), BlockStatus(memAndDisk, 50L, 100L, 1L))
5761
status.updateBlock(TestBlockId("fee"), BlockStatus(memAndDisk, 100L, 20L, 0L))
5862
assert(status.blocks.size === 3)
5963
assert(status.memUsed === 160L)
6064
assert(status.memRemaining === 840L)
6165
assert(status.diskUsed === 140L)
66+
assert(status.offHeapUsed === 2L)
6267
}
6368

6469
test("storage status remove non-RDD blocks") {
@@ -70,17 +75,18 @@ class StorageSuite extends FunSuite {
7075
assert(status.memUsed === 10L)
7176
assert(status.memRemaining === 990L)
7277
assert(status.diskUsed === 20L)
78+
assert(status.offHeapUsed === 1L)
7379
}
7480

75-
// For testing add/update/remove/contains/getBlock and numBlocks
81+
// For testing add, update, remove, get, and contains etc. for both RDD and non-RDD blocks
7682
private def storageStatus2: StorageStatus = {
7783
val status = new StorageStatus(BlockManagerId("big", "dog", 1, 1), 1000L)
7884
assert(status.rddBlocks.isEmpty)
7985
status.addBlock(TestBlockId("dan"), BlockStatus(memAndDisk, 10L, 20L, 0L))
8086
status.addBlock(TestBlockId("man"), BlockStatus(memAndDisk, 10L, 20L, 0L))
81-
status.addBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 10L, 20L, 0L))
82-
status.addBlock(RDDBlockId(1, 1), BlockStatus(memAndDisk, 100L, 200L, 0L))
83-
status.addBlock(RDDBlockId(2, 2), BlockStatus(memAndDisk, 10L, 20L, 0L))
87+
status.addBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 10L, 20L, 1L))
88+
status.addBlock(RDDBlockId(1, 1), BlockStatus(memAndDisk, 100L, 200L, 1L))
89+
status.addBlock(RDDBlockId(2, 2), BlockStatus(memAndDisk, 10L, 20L, 1L))
8490
status.addBlock(RDDBlockId(2, 3), BlockStatus(memAndDisk, 10L, 20L, 0L))
8591
status.addBlock(RDDBlockId(2, 4), BlockStatus(memAndDisk, 10L, 40L, 0L))
8692
status
@@ -109,6 +115,19 @@ class StorageSuite extends FunSuite {
109115
assert(status.diskUsedByRDD(0) === 20L)
110116
assert(status.diskUsedByRDD(1) === 200L)
111117
assert(status.diskUsedByRDD(2) === 80L)
118+
assert(status.offHeapUsedByRdd(0) === 1L)
119+
assert(status.offHeapUsedByRdd(1) === 1L)
120+
assert(status.offHeapUsedByRdd(2) === 1L)
121+
assert(status.rddStorageLevel(0) === Some(memAndDisk))
122+
assert(status.rddStorageLevel(1) === Some(memAndDisk))
123+
assert(status.rddStorageLevel(2) === Some(memAndDisk))
124+
125+
// Verify default values for RDDs that don't exist
126+
assert(status.rddBlocksById(10).isEmpty)
127+
assert(status.memUsedByRDD(10) === 0L)
128+
assert(status.diskUsedByRDD(10) === 0L)
129+
assert(status.offHeapUsedByRdd(10) === 0L)
130+
assert(status.rddStorageLevel(10) === None)
112131
}
113132

114133
test("storage status update RDD blocks") {
@@ -127,6 +146,9 @@ class StorageSuite extends FunSuite {
127146
assert(status.diskUsedByRDD(0) === 0L)
128147
assert(status.diskUsedByRDD(1) === 200L)
129148
assert(status.diskUsedByRDD(2) === 1060L)
149+
assert(status.offHeapUsedByRdd(0) === 0L)
150+
assert(status.offHeapUsedByRdd(1) === 1L)
151+
assert(status.offHeapUsedByRdd(2) === 0L)
130152
}
131153

132154
test("storage status remove RDD blocks") {
@@ -150,6 +172,9 @@ class StorageSuite extends FunSuite {
150172
assert(status.diskUsedByRDD(0) === 20L)
151173
assert(status.diskUsedByRDD(1) === 0L)
152174
assert(status.diskUsedByRDD(2) === 20L)
175+
assert(status.offHeapUsedByRdd(0) === 1L)
176+
assert(status.offHeapUsedByRdd(1) === 0L)
177+
assert(status.offHeapUsedByRdd(2) === 0L)
153178
}
154179

155180
test("storage status containsBlock") {
@@ -182,23 +207,87 @@ class StorageSuite extends FunSuite {
182207
assert(status.blocks.get(RDDBlockId(100, 0)) === status.getBlock(RDDBlockId(100, 0)))
183208
}
184209

185-
test("storage status numBlocks") {
210+
test("storage status num[Rdd]Blocks") {
186211
val status = storageStatus2
187212
assert(status.blocks.size === status.numBlocks)
213+
assert(status.rddBlocks.size === status.numRddBlocks)
214+
status.addBlock(TestBlockId("Foo"), BlockStatus(memAndDisk, 0L, 0L, 100L))
188215
status.addBlock(RDDBlockId(4, 4), BlockStatus(memAndDisk, 0L, 0L, 100L))
189-
assert(status.blocks.size === status.numBlocks)
190216
status.addBlock(RDDBlockId(4, 8), BlockStatus(memAndDisk, 0L, 0L, 100L))
191217
assert(status.blocks.size === status.numBlocks)
192-
status.updateBlock(RDDBlockId(0, 0), BlockStatus(memAndDisk, 0L, 0L, 100L))
193-
assert(status.blocks.size === status.numBlocks)
194-
// update a block that doesn't exist
195-
status.updateBlock(RDDBlockId(100, 99), BlockStatus(memAndDisk, 0L, 0L, 100L))
218+
assert(status.rddBlocks.size === status.numRddBlocks)
219+
assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
220+
assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
221+
status.updateBlock(TestBlockId("Foo"), BlockStatus(memAndDisk, 0L, 10L, 400L))
222+
status.updateBlock(RDDBlockId(4, 0), BlockStatus(memAndDisk, 0L, 0L, 100L))
223+
status.updateBlock(RDDBlockId(4, 8), BlockStatus(memAndDisk, 0L, 0L, 100L))
224+
status.updateBlock(RDDBlockId(10, 10), BlockStatus(memAndDisk, 0L, 0L, 100L))
196225
assert(status.blocks.size === status.numBlocks)
197-
status.removeBlock(RDDBlockId(0, 0))
226+
assert(status.rddBlocks.size === status.numRddBlocks)
227+
assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
228+
assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
229+
assert(status.rddBlocksById(100).size === status.numRddBlocksById(100))
230+
status.removeBlock(RDDBlockId(4, 0))
231+
status.removeBlock(RDDBlockId(10, 10))
198232
assert(status.blocks.size === status.numBlocks)
233+
assert(status.rddBlocks.size === status.numRddBlocks)
234+
assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
235+
assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
199236
// remove a block that doesn't exist
200237
status.removeBlock(RDDBlockId(1000, 999))
201238
assert(status.blocks.size === status.numBlocks)
239+
assert(status.rddBlocks.size === status.numRddBlocks)
240+
assert(status.rddBlocksById(4).size === status.numRddBlocksById(4))
241+
assert(status.rddBlocksById(10).size === status.numRddBlocksById(10))
242+
assert(status.rddBlocksById(1000).size === status.numRddBlocksById(1000))
243+
}
244+
245+
test("storage status updateRddStorageInfo") {
246+
val status = storageStatus2
247+
// Positive delta
248+
status.updateRddStorageInfo(0, 1000L, 1000L, 1000L, memOnly)
249+
status.updateRddStorageInfo(1, 2000L, 2000L, 2000L, diskOnly)
250+
status.updateRddStorageInfo(2, 3000L, 3000L, 3000L, memAndDisk)
251+
assert(status.memUsedByRDD(0) === 1010L)
252+
assert(status.memUsedByRDD(1) === 2100L)
253+
assert(status.memUsedByRDD(2) === 3030L)
254+
assert(status.diskUsedByRDD(0) === 1020L)
255+
assert(status.diskUsedByRDD(1) === 2200L)
256+
assert(status.diskUsedByRDD(2) === 3080L)
257+
assert(status.offHeapUsedByRdd(0) === 1001L)
258+
assert(status.offHeapUsedByRdd(1) === 2001L)
259+
assert(status.offHeapUsedByRdd(2) === 3001L)
260+
assert(status.rddStorageLevel(0) === Some(memOnly))
261+
assert(status.rddStorageLevel(1) === Some(diskOnly))
262+
assert(status.rddStorageLevel(2) === Some(memAndDisk))
263+
264+
// Negative delta
265+
status.updateRddStorageInfo(0, -100L, -100L, -100L, memOnly)
266+
status.updateRddStorageInfo(1, -200L, -200L, -200L, diskOnly)
267+
status.updateRddStorageInfo(2, -300L, -300L, -300L, memAndDisk)
268+
assert(status.memUsedByRDD(0) === 910L)
269+
assert(status.memUsedByRDD(1) === 1900L)
270+
assert(status.memUsedByRDD(2) === 2730L)
271+
assert(status.diskUsedByRDD(0) === 920L)
272+
assert(status.diskUsedByRDD(1) === 2000L)
273+
assert(status.diskUsedByRDD(2) === 2780L)
274+
assert(status.offHeapUsedByRdd(0) === 901L)
275+
assert(status.offHeapUsedByRdd(1) === 1801L)
276+
assert(status.offHeapUsedByRdd(2) === 2701L)
277+
278+
// Negative delta so large that the RDDs are no longer persisted
279+
status.updateRddStorageInfo(0, -10000L, -10000L, -10000L, memOnly)
280+
status.updateRddStorageInfo(1, -20000L, -20000L, -20000L, diskOnly)
281+
status.updateRddStorageInfo(2, -30000L, -30000L, -30000L, memAndDisk)
282+
assert(status.memUsedByRDD(0) === 0L)
283+
assert(status.memUsedByRDD(1) === 0L)
284+
assert(status.memUsedByRDD(2) === 0L)
285+
assert(status.diskUsedByRDD(0) === 0L)
286+
assert(status.diskUsedByRDD(1) === 0L)
287+
assert(status.diskUsedByRDD(2) === 0L)
288+
assert(status.offHeapUsedByRdd(0) === 0L)
289+
assert(status.offHeapUsedByRdd(1) === 0L)
290+
assert(status.offHeapUsedByRdd(2) === 0L)
202291
}
203292

204293
// For testing StorageUtils.updateRddInfo and StorageUtils.getRddBlockLocations

0 commit comments

Comments
 (0)