-
Notifications
You must be signed in to change notification settings - Fork 588
HDDS-4708. Optimization: update RetryCount less frequently (update once per ~100) #1805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,6 +81,8 @@ public class DeletedBlockLogImpl | |
| private final Lock lock; | ||
| // Maps txId to set of DNs which are successful in committing the transaction | ||
| private Map<Long, Set<UUID>> transactionToDNsCommitMap; | ||
| // Maps txId to its retry counts; | ||
| private Map<Long, Integer> transactionRetryCountMap; | ||
|
|
||
| private final AtomicLong largestTxnId; | ||
| // largest transactionId is stored at largestTxnIdHolderKey | ||
|
|
@@ -102,7 +104,7 @@ public DeletedBlockLogImpl(ConfigurationSource conf, | |
|
|
||
| // maps transaction to dns which have committed it. | ||
| transactionToDNsCommitMap = new ConcurrentHashMap<>(); | ||
|
|
||
| transactionRetryCountMap = new ConcurrentHashMap<>(); | ||
| this.largestTxnId = new AtomicLong(this.getLargestRecordedTXID()); | ||
| } | ||
|
|
||
|
|
@@ -187,19 +189,30 @@ public void incrementCount(List<Long> txIDs) throws IOException { | |
| } | ||
| continue; | ||
| } | ||
| DeletedBlocksTransaction.Builder builder = block.toBuilder(); | ||
| int currentCount = block.getCount(); | ||
|
|
||
| int currentCount = | ||
| transactionRetryCountMap.getOrDefault(txID, block.getCount()); | ||
| if (currentCount > -1) { | ||
| builder.setCount(++currentCount); | ||
| } | ||
| // if the retry time exceeds the maxRetry value | ||
| // then set the retry value to -1, stop retrying, admins can | ||
| // analyze those blocks and purge them manually by SCMCli. | ||
| if (currentCount > maxRetry) { | ||
| builder.setCount(-1); | ||
| int nextCount = currentCount + 1; | ||
| DeletedBlocksTransaction.Builder builder = block.toBuilder(); | ||
| if (nextCount > maxRetry) { | ||
| // if the retry time exceeds the maxRetry value | ||
| // then set the retry value to -1, stop retrying, admins can | ||
| // analyze those blocks and purge them manually by SCMCli. | ||
| builder.setCount(-1); | ||
| scmMetadataStore.getDeletedBlocksTXTable().put(txID, | ||
| builder.build()); | ||
| transactionRetryCountMap.remove(txID); | ||
| } else if (nextCount % 100 == 0) { | ||
| // write retry count after every 100 retries into DB. | ||
| builder.setCount(nextCount); | ||
| scmMetadataStore.getDeletedBlocksTXTable().put(txID, | ||
| builder.build()); | ||
| transactionRetryCountMap.put(txID, nextCount); | ||
| } else { | ||
| transactionRetryCountMap.put(txID, nextCount); | ||
| } | ||
| } | ||
| scmMetadataStore.getDeletedBlocksTXTable().put(txID, | ||
| builder.build()); | ||
| } catch (IOException ex) { | ||
| LOG.warn("Cannot increase count for txID " + txID, ex); | ||
| // We do not throw error here, since we don't want to abort the loop. | ||
|
|
@@ -274,6 +287,7 @@ public void commitTransactions( | |
| .collect(Collectors.toList()); | ||
| if (dnsWithCommittedTxn.containsAll(containerDns)) { | ||
| transactionToDNsCommitMap.remove(txID); | ||
| transactionRetryCountMap.remove(txID); | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("Purging txId={} from block deletion log", txID); | ||
| } | ||
|
|
@@ -429,6 +443,10 @@ public DatanodeDeletedBlockTransactions getTransactions( | |
| } | ||
| } | ||
| purgeTransactions(txnsToBePurged); | ||
| for (DeletedBlocksTransaction trx : txnsToBePurged) { | ||
| transactionRetryCountMap.remove(trx.getTxID()); | ||
|
||
| transactionToDNsCommitMap.remove(trx.getTxID()); | ||
| } | ||
| } | ||
| return transactions; | ||
| } finally { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.