-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HDFS-16896 clear ignoredNodes list when we clear deadnode list on ref… #5322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
5a59467
dfe30d0
b3ff5f2
3368917
efb263c
69f6e0e
ac4c0ab
64239e2
c073c9f
40b77b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -197,6 +197,15 @@ private void clearLocalDeadNodes() { | |
| deadNodes.clear(); | ||
| } | ||
|
|
||
| /** | ||
| * Clear list of ignored nodes used for hedged reads. | ||
| */ | ||
| private void clearIgnoredNodes(Collection<DatanodeInfo> ignoredNodes) { | ||
mccormickt12 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (ignoredNodes != null) { | ||
| ignoredNodes.clear(); | ||
| } | ||
| } | ||
|
|
||
| protected DFSClient getDFSClient() { | ||
| return dfsClient; | ||
| } | ||
|
|
@@ -224,7 +233,7 @@ boolean deadNodesContain(DatanodeInfo nodeInfo) { | |
| } | ||
|
|
||
| /** | ||
| * Grab the open-file info from namenode | ||
| * Grab the open-file info from namenode. | ||
mccormickt12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * @param refreshLocatedBlocks whether to re-fetch locatedblocks | ||
| */ | ||
| void openInfo(boolean refreshLocatedBlocks) throws IOException { | ||
|
|
@@ -940,7 +949,8 @@ private DNAddrPair chooseDataNode(LocatedBlock block, | |
| * @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is | ||
| * false. | ||
| */ | ||
| private DNAddrPair chooseDataNode(LocatedBlock block, | ||
| @VisibleForTesting | ||
| DNAddrPair chooseDataNode(LocatedBlock block, | ||
| Collection<DatanodeInfo> ignoredNodes, boolean refetchIfRequired) | ||
| throws IOException { | ||
| while (true) { | ||
|
|
@@ -955,6 +965,10 @@ private DNAddrPair chooseDataNode(LocatedBlock block, | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * RefetchLocations should only be called when there are no active requests | ||
| * to datanodes. In the hedged read case this means futures should be empty | ||
| */ | ||
mccormickt12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private LocatedBlock refetchLocations(LocatedBlock block, | ||
| Collection<DatanodeInfo> ignoredNodes) throws IOException { | ||
| String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), | ||
|
|
@@ -1000,6 +1014,7 @@ private LocatedBlock refetchLocations(LocatedBlock block, | |
| "Interrupted while choosing DataNode for read."); | ||
| } | ||
| clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] | ||
| clearIgnoredNodes(ignoredNodes); | ||
| openInfo(true); | ||
| block = refreshLocatedBlock(block); | ||
| failures++; | ||
|
|
@@ -1337,8 +1352,12 @@ private void hedgedFetchBlockByteRange(LocatedBlock block, long start, | |
| } catch (InterruptedException ie) { | ||
| // Ignore and retry | ||
| } | ||
| if (refetch) { | ||
| refetchLocations(block, ignored); | ||
| // If refetch is true, then all nodes are in deadNodes or ignoredNodes. | ||
| // We should loop through all futures and remove them, so we do not | ||
| // have concurrent requests to the same node. | ||
| // Once all futures are cleared, we can clear the ignoredNodes and retry. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignored and dead nodes are cleared, correct?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, the thing i am trying to emphasize is the |
||
| if (refetch && futures.isEmpty()) { | ||
| block = refetchLocations(block, ignored); | ||
| } | ||
| // We got here if exception. Ignore this node on next go around IFF | ||
| // we found a chosenNode to hedge read against. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -603,7 +603,9 @@ public Void answer(InvocationOnMock invocation) throws Throwable { | |
| input.read(0, buffer, 0, 1024); | ||
| Assert.fail("Reading the block should have thrown BlockMissingException"); | ||
| } catch (BlockMissingException e) { | ||
| assertEquals(3, input.getHedgedReadOpsLoopNumForTesting()); | ||
| // The result of 9 is due to 2 blocks by 4 iterations plus one because | ||
| // hedgedReadOpsLoopNumForTesting is incremented at start of the loop. | ||
| assertEquals(9, input.getHedgedReadOpsLoopNumForTesting()); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are tripling the IO per hedged request?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are actually 4x'ing, the comment was meant to help clarify. |
||
| assertTrue(metrics.getHedgedReadOps() == 0); | ||
| } finally { | ||
| Mockito.reset(injector); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit. Param documentation.