-
Notifications
You must be signed in to change notification settings - Fork 593
HDDS-9130. [hsync] Combine WriteData and PutBlock requests into one #5980
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0a4ef7d
62565d0
d7412f1
feaa93f
a4cfde2
3fdd4d4
62d60c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,8 @@ | |
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.base.Preconditions; | ||
|
|
||
| import static org.apache.hadoop.hdds.DatanodeVersion.COMBINED_PUTBLOCK_WRITECHUNK_RPC; | ||
| import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.putBlockAsync; | ||
| import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.writeChunkAsync; | ||
| import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; | ||
|
|
@@ -140,6 +142,7 @@ public class BlockOutputStream extends OutputStream { | |
| private int replicationIndex; | ||
| private Pipeline pipeline; | ||
| private final ContainerClientMetrics clientMetrics; | ||
| private boolean allowPutBlockPiggybacking; | ||
|
|
||
| /** | ||
| * Creates a new BlockOutputStream. | ||
|
|
@@ -211,6 +214,20 @@ public BlockOutputStream( | |
| this.clientMetrics = clientMetrics; | ||
| this.pipeline = pipeline; | ||
| this.streamBufferArgs = streamBufferArgs; | ||
| this.allowPutBlockPiggybacking = config.getEnablePutblockPiggybacking() && | ||
| allDataNodesSupportPiggybacking(); | ||
| } | ||
|
|
||
| private boolean allDataNodesSupportPiggybacking() { | ||
| // return true only if all DataNodes in the pipeline are on a version | ||
| // that supports PutBlock piggybacking. | ||
| for (DatanodeDetails dn : pipeline.getNodes()) { | ||
| if (dn.getCurrentVersion() < | ||
| COMBINED_PUTBLOCK_WRITECHUNK_RPC.toProtoValue()) { | ||
| return false; | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| void refreshCurrentBuffer() { | ||
|
|
@@ -499,22 +516,8 @@ ContainerCommandResponseProto> executePutBlock(boolean close, | |
| } | ||
| // if the ioException is not set, putBlock is successful | ||
| if (getIoException() == null && !force) { | ||
| BlockID responseBlockID = BlockID.getFromProtobuf( | ||
| e.getPutBlock().getCommittedBlockLength().getBlockID()); | ||
| Preconditions.checkState(blockID.get().getContainerBlockID() | ||
| .equals(responseBlockID.getContainerBlockID())); | ||
| // updates the bcsId of the block | ||
| blockID.set(responseBlockID); | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug( | ||
| "Adding index " + asyncReply.getLogIndex() + " flushLength " | ||
| + flushPos + " numBuffers " + byteBufferList.size() | ||
| + " blockID " + blockID + " bufferPool size" + bufferPool | ||
| .getSize() + " currentBufferIndex " + bufferPool | ||
| .getCurrentBufferIndex()); | ||
| } | ||
| // for standalone protocol, logIndex will always be 0. | ||
| updateCommitInfo(asyncReply, byteBufferList); | ||
| handleSuccessfulPutBlock(e.getPutBlock().getCommittedBlockLength(), | ||
| asyncReply, flushPos, byteBufferList); | ||
| } | ||
| return e; | ||
| }, responseExecutor).exceptionally(e -> { | ||
|
|
@@ -551,7 +554,7 @@ public void flush() throws IOException { | |
| } | ||
| } | ||
|
|
||
| private void writeChunk(ChunkBuffer buffer) | ||
| private void writeChunkCommon(ChunkBuffer buffer) | ||
| throws IOException { | ||
| // This data in the buffer will be pushed to datanode and a reference will | ||
| // be added to the bufferList. Once putBlock gets executed, this list will | ||
|
|
@@ -562,7 +565,18 @@ private void writeChunk(ChunkBuffer buffer) | |
| bufferList = new ArrayList<>(); | ||
| } | ||
| bufferList.add(buffer); | ||
| writeChunkToContainer(buffer.duplicate(0, buffer.position())); | ||
| } | ||
|
|
||
| private void writeChunk(ChunkBuffer buffer) | ||
| throws IOException { | ||
| writeChunkCommon(buffer); | ||
| writeChunkToContainer(buffer.duplicate(0, buffer.position()), false); | ||
| } | ||
|
|
||
| private void writeChunkAndPutBlock(ChunkBuffer buffer) | ||
| throws IOException { | ||
| writeChunkCommon(buffer); | ||
| writeChunkToContainer(buffer.duplicate(0, buffer.position()), true); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -594,14 +608,23 @@ private void handleFlushInternal(boolean close) | |
| if (totalDataFlushedLength < writtenDataLength) { | ||
| refreshCurrentBuffer(); | ||
| Preconditions.checkArgument(currentBuffer.position() > 0); | ||
| if (currentBuffer.hasRemaining()) { | ||
| writeChunk(currentBuffer); | ||
| } | ||
|
|
||
| // This can be a partially filled chunk. Since we are flushing the buffer | ||
| // here, we just limit this buffer to the current position. So that next | ||
| // write will happen in new buffer | ||
| updateFlushLength(); | ||
| executePutBlock(close, false); | ||
| if (currentBuffer.hasRemaining()) { | ||
| if (allowPutBlockPiggybacking) { | ||
| updateFlushLength(); | ||
| writeChunkAndPutBlock(currentBuffer); | ||
| } else { | ||
| writeChunk(currentBuffer); | ||
| updateFlushLength(); | ||
| executePutBlock(close, false); | ||
| } | ||
| } else { | ||
| updateFlushLength(); | ||
| executePutBlock(close, false); | ||
| } | ||
| } else if (close) { | ||
| // forcing an "empty" putBlock if stream is being closed without new | ||
| // data since latest flush - we need to send the "EOF" flag | ||
|
|
@@ -713,7 +736,7 @@ public boolean isClosed() { | |
| * @return | ||
| */ | ||
| CompletableFuture<ContainerCommandResponseProto> writeChunkToContainer( | ||
| ChunkBuffer chunk) throws IOException { | ||
| ChunkBuffer chunk, boolean putBlockPiggybacking) throws IOException { | ||
| int effectiveChunkSize = chunk.remaining(); | ||
| final long offset = chunkOffset.getAndAdd(effectiveChunkSize); | ||
| final ByteString data = chunk.toByteString( | ||
|
|
@@ -726,6 +749,8 @@ CompletableFuture<ContainerCommandResponseProto> writeChunkToContainer( | |
| .setChecksumData(checksumData.getProtoBufMessage()) | ||
| .build(); | ||
|
|
||
| long flushPos = totalDataFlushedLength; | ||
|
|
||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("Writing chunk {} length {} at offset {}", | ||
| chunkInfo.getChunkName(), effectiveChunkSize, offset); | ||
|
|
@@ -743,42 +768,93 @@ CompletableFuture<ContainerCommandResponseProto> writeChunkToContainer( | |
| + ", previous = " + previous); | ||
| } | ||
|
|
||
| final List<ChunkBuffer> byteBufferList; | ||
| CompletableFuture<ContainerProtos.ContainerCommandResponseProto> | ||
| validateFuture = null; | ||
| try { | ||
| XceiverClientReply asyncReply = writeChunkAsync(xceiverClient, chunkInfo, | ||
| blockID.get(), data, tokenString, replicationIndex); | ||
| CompletableFuture<ContainerProtos.ContainerCommandResponseProto> | ||
| respFuture = asyncReply.getResponse(); | ||
| CompletableFuture<ContainerProtos.ContainerCommandResponseProto> | ||
| validateFuture = respFuture.thenApplyAsync(e -> { | ||
| try { | ||
| validateResponse(e); | ||
| } catch (IOException sce) { | ||
| respFuture.completeExceptionally(sce); | ||
| } | ||
| return e; | ||
| }, responseExecutor).exceptionally(e -> { | ||
| String msg = "Failed to write chunk " + chunkInfo.getChunkName() + | ||
| " into block " + blockID; | ||
| LOG.debug("{}, exception: {}", msg, e.getLocalizedMessage()); | ||
| CompletionException ce = new CompletionException(msg, e); | ||
| setIoException(ce); | ||
| throw ce; | ||
| }); | ||
| BlockData blockData = null; | ||
|
|
||
| if (config.getIncrementalChunkList()) { | ||
| updateBlockDataForWriteChunk(chunk); | ||
| } else { | ||
| containerBlockData.addChunks(chunkInfo); | ||
| } | ||
| if (putBlockPiggybacking) { | ||
| Preconditions.checkNotNull(bufferList); | ||
| byteBufferList = bufferList; | ||
| bufferList = null; | ||
| Preconditions.checkNotNull(byteBufferList); | ||
|
|
||
| blockData = containerBlockData.build(); | ||
| LOG.debug("piggyback chunk list {}", blockData); | ||
|
|
||
| if (config.getIncrementalChunkList()) { | ||
| // remove any chunks in the containerBlockData list. | ||
| // since they are sent. | ||
| containerBlockData.clearChunks(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to remove last chunk as well here which is updated in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No -- |
||
| } | ||
| } else { | ||
| byteBufferList = null; | ||
| } | ||
|
|
||
| XceiverClientReply asyncReply = writeChunkAsync(xceiverClient, chunkInfo, | ||
| blockID.get(), data, tokenString, replicationIndex, blockData); | ||
| CompletableFuture<ContainerProtos.ContainerCommandResponseProto> | ||
| respFuture = asyncReply.getResponse(); | ||
| validateFuture = respFuture.thenApplyAsync(e -> { | ||
| try { | ||
| validateResponse(e); | ||
| } catch (IOException sce) { | ||
| respFuture.completeExceptionally(sce); | ||
| } | ||
| // if the ioException is not set, putBlock is successful | ||
| if (getIoException() == null && putBlockPiggybacking) { | ||
| handleSuccessfulPutBlock(e.getWriteChunk().getCommittedBlockLength(), | ||
| asyncReply, flushPos, byteBufferList); | ||
| } | ||
| return e; | ||
| }, responseExecutor).exceptionally(e -> { | ||
| String msg = "Failed to write chunk " + chunkInfo.getChunkName() + | ||
| " into block " + blockID; | ||
| LOG.debug("{}, exception: {}", msg, e.getLocalizedMessage()); | ||
| CompletionException ce = new CompletionException(msg, e); | ||
| setIoException(ce); | ||
| throw ce; | ||
| }); | ||
| clientMetrics.recordWriteChunk(pipeline, chunkInfo.getLen()); | ||
| return validateFuture; | ||
|
|
||
| } catch (IOException | ExecutionException e) { | ||
| throw new IOException(EXCEPTION_MSG + e.toString(), e); | ||
| } catch (InterruptedException ex) { | ||
| Thread.currentThread().interrupt(); | ||
| handleInterruptedException(ex, false); | ||
| } | ||
| return null; | ||
| if (putBlockPiggybacking) { | ||
| putFlushFuture(flushPos, validateFuture); | ||
| } | ||
| return validateFuture; | ||
| } | ||
|
|
||
| private void handleSuccessfulPutBlock( | ||
| ContainerProtos.GetCommittedBlockLengthResponseProto e, | ||
| XceiverClientReply asyncReply, long flushPos, | ||
| List<ChunkBuffer> byteBufferList) { | ||
| BlockID responseBlockID = BlockID.getFromProtobuf( | ||
| e.getBlockID()); | ||
| Preconditions.checkState(blockID.get().getContainerBlockID() | ||
| .equals(responseBlockID.getContainerBlockID())); | ||
| // updates the bcsId of the block | ||
| blockID.set(responseBlockID); | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug( | ||
| "Adding index " + asyncReply.getLogIndex() + " flushLength " | ||
| + flushPos + " numBuffers " + byteBufferList.size() | ||
| + " blockID " + blockID + " bufferPool size" + bufferPool | ||
| .getSize() + " currentBufferIndex " + bufferPool | ||
| .getCurrentBufferIndex()); | ||
| } | ||
| // for standalone protocol, logIndex will always be 0. | ||
| updateCommitInfo(asyncReply, byteBufferList); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whether PutBlock required here for small chunk?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the typical case (no hflush) where a PutBlock is sent to update metadata after four 1-MB chunks are sent via WriteChunk requests)