@@ -60,7 +60,7 @@ import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
6060 * Note that zero-sized blocks are already excluded, which happened in
6161 * [[org.apache.spark.MapOutputTracker.convertMapStatuses ]].
6262 * @param mapOutputTracker [[MapOutputTracker ]] for falling back to fetching the original blocks if
63- * we fail to fetch shuffle chunks when push based shuffle is enabled.
63+ * we fail to fetch shuffle chunks when push based shuffle is enabled.
6464 * @param streamWrapper A function to wrap the returned input stream.
6565 * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
6666 * @param maxReqsInFlight max number of remote requests to fetch blocks at any given point.
@@ -371,9 +371,9 @@ final class ShuffleBlockFetcherIterator(
371371 // blocks.Remote blocks are further split into FetchRequests of size at most maxBytesInFlight
372372 // in order to limit the amount of data in flight
373373 val collectedRemoteRequests = new ArrayBuffer [FetchRequest ]
374- val hostLocalBlocksCurrentIteration = mutable.LinkedHashSet [(BlockId , Int )]()
375374 var localBlockBytes = 0L
376375 var hostLocalBlockBytes = 0L
376+ var numHostLocalBlocks = 0
377377 var pushMergedLocalBlockBytes = 0L
378378 val prevNumBlocksToFetch = numBlocksToFetch
379379
@@ -404,7 +404,7 @@ final class ShuffleBlockFetcherIterator(
404404 val blocksForAddress =
405405 mergedBlockInfos.map(info => (info.blockId, info.size, info.mapIndex))
406406 hostLocalBlocksByExecutor += address -> blocksForAddress
407- hostLocalBlocksCurrentIteration ++ = blocksForAddress.map(info => (info._1, info._3))
407+ numHostLocalBlocks + = blocksForAddress.size
408408 hostLocalBlockBytes += mergedBlockInfos.map(_.size).sum
409409 } else {
410410 val (_, timeCost) = Utils .timeTakenMs[Unit ] {
@@ -419,21 +419,22 @@ final class ShuffleBlockFetcherIterator(
419419 pushMergedLocalBlockBytes
420420 val blocksToFetchCurrentIteration = numBlocksToFetch - prevNumBlocksToFetch
421421 assert(blocksToFetchCurrentIteration == localBlocks.size +
422- hostLocalBlocksCurrentIteration.size + numRemoteBlocks + pushMergedLocalBlocks.size,
423- s " The number of non-empty blocks $blocksToFetchCurrentIteration doesn't equal to " +
424- s " the number of local blocks ${localBlocks.size} + " +
425- s " the number of host-local blocks ${hostLocalBlocksCurrentIteration.size } " +
422+ numHostLocalBlocks + numRemoteBlocks + pushMergedLocalBlocks.size,
423+ s " The number of non-empty blocks $blocksToFetchCurrentIteration doesn't equal to the sum " +
424+ s " of the number of local blocks ${localBlocks.size} + " +
425+ s " the number of host-local blocks ${numHostLocalBlocks } " +
426426 s " the number of push-merged-local blocks ${pushMergedLocalBlocks.size} " +
427427 s " + the number of remote blocks ${numRemoteBlocks} " )
428428 logInfo(s " Getting $blocksToFetchCurrentIteration " +
429429 s " ( ${Utils .bytesToString(totalBytes)}) non-empty blocks including " +
430430 s " ${localBlocks.size} ( ${Utils .bytesToString(localBlockBytes)}) local and " +
431- s " ${hostLocalBlocksCurrentIteration.size } ( ${Utils .bytesToString(hostLocalBlockBytes)}) " +
431+ s " ${numHostLocalBlocks } ( ${Utils .bytesToString(hostLocalBlockBytes)}) " +
432432 s " host-local and ${pushMergedLocalBlocks.size} " +
433433 s " ( ${Utils .bytesToString(pushMergedLocalBlockBytes)}) " +
434- s " local push-merged and $numRemoteBlocks ( ${Utils .bytesToString(remoteBlockBytes)}) " +
434+ s " push-merged-local and $numRemoteBlocks ( ${Utils .bytesToString(remoteBlockBytes)}) " +
435435 s " remote blocks " )
436- this .hostLocalBlocks ++= hostLocalBlocksCurrentIteration
436+ this .hostLocalBlocks ++= hostLocalBlocksByExecutor.values
437+ .flatMap { infos => infos.map(info => (info._1, info._3)) }
437438 collectedRemoteRequests
438439 }
439440
@@ -883,9 +884,9 @@ final class ShuffleBlockFetcherIterator(
883884 // We get this result in 3 cases:
884885 // 1. Failure to fetch the data of a remote shuffle chunk. In this case, the
885886 // blockId is a ShuffleBlockChunkId.
886- // 2. Failure to read the local push-merged meta. In this case, the blockId is
887+ // 2. Failure to read the push-merged-local meta. In this case, the blockId is
887888 // ShuffleBlockId.
888- // 3. Failure to get the local push-merged directories from the ESS. In this case, the
889+ // 3. Failure to get the push-merged-local directories from the ESS. In this case, the
889890 // blockId is ShuffleBlockId.
890891 if (pushBasedFetchHelper.isRemotePushMergedBlockAddress(address)) {
891892 numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
@@ -900,8 +901,8 @@ final class ShuffleBlockFetcherIterator(
900901 // a SuccessFetchResult or a FailureFetchResult.
901902 result = null
902903
903- case PushMergedLocalMetaFetchResult (shuffleId, reduceId, bitmaps, localDirs, _ ) =>
904- // Fetch local push-merged shuffle block data as multiple shuffle chunks
904+ case PushMergedLocalMetaFetchResult (shuffleId, reduceId, bitmaps, localDirs) =>
905+ // Fetch push-merged-local shuffle block data as multiple shuffle chunks
905906 val shuffleBlockId = ShuffleBlockId (shuffleId, SHUFFLE_PUSH_MAP_ID , reduceId)
906907 try {
907908 val bufs : Seq [ManagedBuffer ] = blockManager.getLocalMergedBlockData(shuffleBlockId,
@@ -922,17 +923,17 @@ final class ShuffleBlockFetcherIterator(
922923 }
923924 } catch {
924925 case e : Exception =>
925- // If we see an exception with reading local push-merged data , we fallback to
926- // fetch the original blocks. We do not report block fetch failure
926+ // If we see an exception with reading push-merged-local index file , we fallback
927+ // to fetch the original blocks. We do not report block fetch failure
927928 // and will continue with the remaining local block read.
928- logWarning(s " Error occurred while fetching local push-merged data , " +
929+ logWarning(s " Error occurred while reading push-merged-local index , " +
929930 s " prepare to fetch the original blocks " , e)
930931 pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(
931932 shuffleBlockId, pushBasedFetchHelper.localShuffleMergerBlockMgrId)
932933 }
933934 result = null
934935
935- case PushMergedRemoteMetaFetchResult (shuffleId, reduceId, blockSize, bitmaps, address, _ ) =>
936+ case PushMergedRemoteMetaFetchResult (shuffleId, reduceId, blockSize, bitmaps, address) =>
936937 // The original meta request is processed so we decrease numBlocksToFetch and
937938 // numBlocksInFlightPerAddress by 1. We will collect new shuffle chunks request and the
938939 // count of this is added to numBlocksToFetch in collectFetchReqsFromMergedBlocks.
@@ -946,7 +947,7 @@ final class ShuffleBlockFetcherIterator(
946947 // Set result to null to force another iteration.
947948 result = null
948949
949- case PushMergedRemoteMetaFailedFetchResult (shuffleId, reduceId, address, _ ) =>
950+ case PushMergedRemoteMetaFailedFetchResult (shuffleId, reduceId, address) =>
950951 // The original meta request failed so we decrease numBlocksInFlightPerAddress by 1.
951952 numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
952953 // If we fail to fetch the meta of a push-merged block, we fall back to fetching the
@@ -1071,8 +1072,8 @@ final class ShuffleBlockFetcherIterator(
10711072 results.put(result)
10721073 }
10731074
1074- private [storage] def incrementNumBlocksToFetch ( moreBlocksToFetch : Int ): Unit = {
1075- numBlocksToFetch += moreBlocksToFetch
1075+ private [storage] def decreaseNumBlocksToFetch ( blocksFetched : Int ): Unit = {
1076+ numBlocksToFetch -= blocksFetched
10761077 }
10771078
10781079 /**
@@ -1091,7 +1092,7 @@ final class ShuffleBlockFetcherIterator(
10911092 originalLocalBlocks, originalHostLocalBlocksByExecutor, originalMergedLocalBlocks)
10921093 // Add the remote requests into our queue in a random order
10931094 fetchRequests ++= Utils .randomize(originalRemoteReqs)
1094- logInfo(s " Started ${originalRemoteReqs.size} fallback remote requests for push-merged " )
1095+ logInfo(s " Created ${originalRemoteReqs.size} fallback remote requests for push-merged " )
10951096 // fetch all the fallback blocks that are local.
10961097 fetchLocalBlocks(originalLocalBlocks)
10971098 // Merged local blocks should be empty during fallback
@@ -1246,12 +1247,6 @@ object ShuffleBlockFetcherIterator {
12461247 }
12471248 }
12481249
1249- /**
1250- * Dummy shuffle block id to fill into [[PushMergedRemoteMetaFetchResult ]] and
1251- * [[PushMergedRemoteMetaFailedFetchResult ]], to match the [[FetchResult ]] trait.
1252- */
1253- private val DUMMY_SHUFFLE_BLOCK_ID = ShuffleBlockId (- 1 , - 1 , - 1 )
1254-
12551250 /**
12561251 * This function is used to merged blocks when doBatchFetch is true. Blocks which have the
12571252 * same `mapId` can be merged into one block batch. The block batch is specified by a range
@@ -1436,8 +1431,7 @@ object ShuffleBlockFetcherIterator {
14361431 reduceId : Int ,
14371432 blockSize : Long ,
14381433 bitmaps : Array [RoaringBitmap ],
1439- address : BlockManagerId ,
1440- blockId : BlockId = DUMMY_SHUFFLE_BLOCK_ID ) extends FetchResult
1434+ address : BlockManagerId ) extends FetchResult
14411435
14421436 /**
14431437 * Result of a failure while fetching the meta information for a remote push-merged block.
@@ -1449,11 +1443,10 @@ object ShuffleBlockFetcherIterator {
14491443 private [storage] case class PushMergedRemoteMetaFailedFetchResult (
14501444 shuffleId : Int ,
14511445 reduceId : Int ,
1452- address : BlockManagerId ,
1453- blockId : BlockId = DUMMY_SHUFFLE_BLOCK_ID ) extends FetchResult
1446+ address : BlockManagerId ) extends FetchResult
14541447
14551448 /**
1456- * Result of a successful fetch of meta information for a local push-merged block.
1449+ * Result of a successful fetch of meta information for a push-merged-local block.
14571450 *
14581451 * @param shuffleId shuffle id.
14591452 * @param reduceId reduce id.
@@ -1464,6 +1457,5 @@ object ShuffleBlockFetcherIterator {
14641457 shuffleId : Int ,
14651458 reduceId : Int ,
14661459 bitmaps : Array [RoaringBitmap ],
1467- localDirs : Array [String ],
1468- blockId : BlockId = DUMMY_SHUFFLE_BLOCK_ID ) extends FetchResult
1460+ localDirs : Array [String ]) extends FetchResult
14691461}
0 commit comments