-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-9853][Core] Optimize shuffle fetch of contiguous partition IDs #19788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e947bcb
53affd4
e437a26
12163f3
7aa805b
9cb1f0f
2799886
80c8da9
b410f9c
85a4f49
c7db28d
ff4bece
80bb53c
e18eb59
69cde07
978e10d
2a1a97c
d8691ee
f30f057
6c684bc
a9ab62e
87c9af6
7fa0fb9
ab4aa5f
e5e7c38
6ee294a
583666b
c133776
fc0fe77
7009a5e
4a31e9c
08d2ca1
5efa1af
3412bcb
63d9eb1
a0cd415
3da48d8
c650e65
752f90c
b91c3e9
9ffe59d
9354ed0
0f49142
535fe5f
087422f
2191f8d
2ca4092
6496abf
e91dbd5
6cb7110
05e4465
eb752aa
694ec2d
45d7096
4850d33
28355b1
7a65cc4
a9ffdee
96b0082
ace48fd
86aa1fb
fcf434b
a360bbf
2f28a7e
5bd6d73
aa6134b
c6ebe0e
be54f49
1c91d20
f810e28
8a1815f
076894f
b33b4dd
91aeff9
ea1795a
dd980dd
e9d8620
5933bf8
57fab14
c75e016
8398120
2424be0
bd9f70e
5e4430a
401bddb
3d4fc7e
039ae85
92c0ab6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -161,22 +161,69 @@ public void registerExecutor( | |
| executors.put(fullId, executorInfo); | ||
| } | ||
|
|
||
| // For testing | ||
| public ManagedBuffer getBlockData( | ||
| String appId, | ||
| String execId, | ||
| int shuffleId, | ||
| int mapId, | ||
| int reduceId) { | ||
| return getBlockData(appId, execId, shuffleId, mapId, reduceId, 1); | ||
| } | ||
|
|
||
| /** | ||
| * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, reduceId). We make assumptions | ||
| * about how the hash and sort based shuffles store their data. | ||
| * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, reduceId, numReducers). We make | ||
| * assumptions about how the hash and sort based shuffles store their data. | ||
| */ | ||
| public ManagedBuffer getBlockData( | ||
| String appId, | ||
| String execId, | ||
| int shuffleId, | ||
| int mapId, | ||
| int reduceId) { | ||
| int reduceId, | ||
| int numReducers) { | ||
| ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId)); | ||
| if (executor == null) { | ||
| throw new RuntimeException( | ||
| String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId)); | ||
| } | ||
| return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId); | ||
| return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId, numReducers); | ||
| } | ||
|
|
||
| public static boolean isShuffleBlock(String[] blockIdParts) { | ||
| return blockIdParts.length == 4 && blockIdParts[0].equals("shuffle"); | ||
| } | ||
|
|
||
| public static int[] getBlockIdParts(String blockId) { | ||
| String[] blockIdParts = blockId.split("_"); | ||
| if (!isShuffleBlock(blockIdParts)) { | ||
| throw new IllegalArgumentException("Unexpected shuffle block id format: " + blockId); | ||
| } | ||
| return new int[] { Integer.parseInt(blockIdParts[2]), Integer.parseInt(blockIdParts[3]) }; | ||
| } | ||
|
|
||
| // Currently, for all input blockIds, we can make assumption that block ids of the same mapper id | ||
| // are consecutive in the map output file. Although, logically, they might not be consecutive | ||
| // because of zero-sized blocks, which have been filtered out in the client side actually. | ||
| public static ArrayList<ArrayList<int[]>> mergeContinuousShuffleBlockIds(String[] blockIds) { | ||
| ArrayList<int[]> shuffleBlockIds = new ArrayList<>(); | ||
| ArrayList<ArrayList<int[]>> arrayShuffleBlockIds = new ArrayList<>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we only need to return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initially, I want to keep it the same as
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, seems like numBlocks is not enough, which includes possible zero size blocks. |
||
|
|
||
| for (String blockId: blockIds) { | ||
| int[] blockIdParts = getBlockIdParts(blockId); | ||
| if (shuffleBlockIds.size() == 0) { | ||
| shuffleBlockIds.add(blockIdParts); | ||
| } else { | ||
| if (blockIdParts[0] != shuffleBlockIds.get(0)[0]) { | ||
| arrayShuffleBlockIds.add(shuffleBlockIds); | ||
| shuffleBlockIds = new ArrayList<>(); | ||
| } | ||
| shuffleBlockIds.add(blockIdParts); | ||
| } | ||
| } | ||
| arrayShuffleBlockIds.add(shuffleBlockIds); | ||
|
|
||
| return arrayShuffleBlockIds; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -280,13 +327,14 @@ public boolean accept(File dir, String name) { | |
| * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId. | ||
| */ | ||
| private ManagedBuffer getSortBasedShuffleBlockData( | ||
| ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) { | ||
| ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId, int numReducers) { | ||
| File indexFile = getFile(executor.localDirs, executor.subDirsPerLocalDir, | ||
| "shuffle_" + shuffleId + "_" + mapId + "_0.index"); | ||
|
|
||
| try { | ||
| ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFile); | ||
| ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(reduceId); | ||
| ShuffleIndexRecord shuffleIndexRecord = | ||
| shuffleIndexInformation.getIndex(reduceId, numReducers); | ||
| return new FileSegmentManagedBuffer( | ||
| conf, | ||
| getFile(executor.localDirs, executor.subDirsPerLocalDir, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import java.io.IOException; | ||
| import java.nio.ByteBuffer; | ||
| import java.util.Arrays; | ||
| import java.util.ArrayList; | ||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
@@ -50,6 +51,10 @@ public class OneForOneBlockFetcher { | |
| private final TransportClient client; | ||
| private final OpenBlocks openMessage; | ||
| private final String[] blockIds; | ||
| // In adaptive execution, one returned chunk might contain data for several consecutive blockIds, | ||
| // blockIdIndices is used to record the mapping relationship between chunk and its blockIds. | ||
| // chunk i contains block Ids: blockIdIndices[i] until blockIdIndices[i + 1] in blockIds | ||
| private int[] blockIdIndices = null; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a comment to explain the relationship between blocks and chunks. |
||
| private final BlockFetchingListener listener; | ||
| private final ChunkReceivedCallback chunkCallback; | ||
| private final TransportConf transportConf; | ||
|
|
@@ -64,7 +69,7 @@ public OneForOneBlockFetcher( | |
| String[] blockIds, | ||
| BlockFetchingListener listener, | ||
| TransportConf transportConf) { | ||
| this(client, appId, execId, blockIds, listener, transportConf, null); | ||
| this(client, appId, execId, blockIds, listener, transportConf, null, false); | ||
| } | ||
|
|
||
| public OneForOneBlockFetcher( | ||
|
|
@@ -74,9 +79,10 @@ public OneForOneBlockFetcher( | |
| String[] blockIds, | ||
| BlockFetchingListener listener, | ||
| TransportConf transportConf, | ||
| DownloadFileManager downloadFileManager) { | ||
| DownloadFileManager downloadFileManager, | ||
| boolean fetchContinuousShuffleBlocksInBatch) { | ||
| this.client = client; | ||
| this.openMessage = new OpenBlocks(appId, execId, blockIds); | ||
| this.openMessage = new OpenBlocks(appId, execId, blockIds, fetchContinuousShuffleBlocksInBatch); | ||
| this.blockIds = blockIds; | ||
| this.listener = listener; | ||
| this.chunkCallback = new ChunkCallback(); | ||
|
|
@@ -89,13 +95,15 @@ private class ChunkCallback implements ChunkReceivedCallback { | |
| @Override | ||
| public void onSuccess(int chunkIndex, ManagedBuffer buffer) { | ||
| // On receipt of a chunk, pass it upwards as a block. | ||
| listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer); | ||
| listener.onBlockFetchSuccess(Arrays.copyOfRange(blockIds, blockIdIndices[chunkIndex], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a way to avoid copy? e.g. if we change the callback interface to not take |
||
| blockIdIndices[chunkIndex + 1]), buffer); | ||
| } | ||
|
|
||
| @Override | ||
| public void onFailure(int chunkIndex, Throwable e) { | ||
| // On receipt of a failure, fail every block from chunkIndex onwards. | ||
| String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length); | ||
| String[] remainingBlockIds = Arrays.copyOfRange(blockIds, blockIdIndices[chunkIndex], | ||
| blockIds.length); | ||
| failRemainingBlocks(remainingBlockIds, e); | ||
| } | ||
| } | ||
|
|
@@ -117,6 +125,25 @@ public void onSuccess(ByteBuffer response) { | |
| streamHandle = (StreamHandle) BlockTransferMessage.Decoder.fromByteBuffer(response); | ||
| logger.trace("Successfully opened blocks {}, preparing to fetch chunks.", streamHandle); | ||
|
|
||
| // initiate blockIdIndices | ||
| if (streamHandle.numChunks == blockIds.length) { | ||
| blockIdIndices = new int[streamHandle.numChunks + 1]; | ||
| for (int i = 0; i < blockIdIndices.length; i++) { | ||
| blockIdIndices[i] = i; | ||
| } | ||
| } else { | ||
| // server fetches continuous shuffle blocks in batch | ||
| ArrayList<ArrayList<int[]>> arrayShuffleBlockIds = | ||
| ExternalShuffleBlockResolver.mergeContinuousShuffleBlockIds(blockIds); | ||
| assert(streamHandle.numChunks == arrayShuffleBlockIds.size()); | ||
| blockIdIndices = new int[arrayShuffleBlockIds.size() + 1]; | ||
| blockIdIndices[0] = 0; | ||
| for (int i = 1; i < blockIdIndices.length; i++) { | ||
| blockIdIndices[i] = blockIdIndices[i - 1] + arrayShuffleBlockIds.get(i - 1).size();; | ||
| } | ||
| } | ||
| assert blockIdIndices[blockIdIndices.length - 1] == blockIds.length; | ||
|
|
||
| // Immediately request all chunks -- we expect that the total size of the request is | ||
| // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]]. | ||
| for (int i = 0; i < streamHandle.numChunks; i++) { | ||
|
|
@@ -143,12 +170,10 @@ public void onFailure(Throwable e) { | |
|
|
||
| /** Invokes the "onBlockFetchFailure" callback for every listed block id. */ | ||
| private void failRemainingBlocks(String[] failedBlockIds, Throwable e) { | ||
| for (String blockId : failedBlockIds) { | ||
| try { | ||
| listener.onBlockFetchFailure(blockId, e); | ||
| } catch (Exception e2) { | ||
| logger.error("Error in block fetch failure callback", e2); | ||
| } | ||
| try { | ||
| listener.onBlockFetchFailure(failedBlockIds, e); | ||
| } catch (Exception e2) { | ||
| logger.error("Error in block fetch failure callback", e2); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -173,7 +198,8 @@ public void onData(String streamId, ByteBuffer buf) throws IOException { | |
|
|
||
| @Override | ||
| public void onComplete(String streamId) throws IOException { | ||
| listener.onBlockFetchSuccess(blockIds[chunkIndex], channel.closeAndRead()); | ||
| listener.onBlockFetchSuccess(Arrays.copyOfRange(blockIds, blockIdIndices[chunkIndex], | ||
| blockIdIndices[chunkIndex + 1]), channel.closeAndRead()); | ||
| if (!downloadFileManager.registerTempFileToClean(targetFile)) { | ||
| targetFile.delete(); | ||
| } | ||
|
|
@@ -183,7 +209,8 @@ public void onComplete(String streamId) throws IOException { | |
| public void onFailure(String streamId, Throwable cause) throws IOException { | ||
| channel.close(); | ||
| // On receipt of a failure, fail every block from chunkIndex onwards. | ||
| String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length); | ||
| String[] remainingBlockIds = | ||
| Arrays.copyOfRange(blockIds, blockIdIndices[chunkIndex], blockIds.length); | ||
| failRemainingBlocks(remainingBlockIds, cause); | ||
| targetFile.delete(); | ||
| } | ||
|
|
||
This comment was marked as resolved.
Sorry, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.
This comment was marked as resolved.
Sorry, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.