diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index e4cc9827b98d..23400b1a06b4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -452,10 +452,6 @@ public final class ScmConfigKeys { public static final boolean OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE_DEFAULT = true; - public static final String OZONE_SCM_BLOCK_DELETION_MAX_RETRY = - "ozone.scm.block.deletion.max.retry"; - public static final int OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT = 4096; - public static final String OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR = "ozone.scm.block.deletion.per.dn.distribution.factor"; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 67ee84512f1d..7677ed58707f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -412,26 +411,6 @@ StartContainerBalancerResponseProto startContainerBalancer( */ void transferLeadership(String newLeaderId) throws IOException; - /** - * Return the failed transactions of the Deleted blocks. A transaction is - * considered to be failed if it has been sent more than MAX_RETRY limit - * and its count is reset to -1. - * - * @param count Maximum num of returned transactions, if {@literal < 0}. return all. - * @param startTxId The least transaction id to start with. - * @return a list of failed deleted block transactions. - * @throws IOException - */ - List getFailedDeletedBlockTxn(int count, - long startTxId) throws IOException; - - /** - * Reset the failed deleted block retry count. - * @param txIDs transactionId list to be reset - * @throws IOException - */ - int resetDeletedBlockRetryCount(List txIDs) throws IOException; - /** * Get usage information of datanode by address or uuid. * diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 1f0f8cd8b066..56411453fc8e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -347,6 +347,7 @@ Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, * @return a list of failed deleted block transactions. * @throws IOException */ + @Deprecated List getFailedDeletedBlockTxn(int count, long startTxId) throws IOException; @@ -357,6 +358,7 @@ List getFailedDeletedBlockTxn(int count, * @return num of successful reset * @throws IOException */ + @Deprecated int resetDeletedBlockRetryCount(List txIDs) throws IOException; /** diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index bf0215c72e81..f040cf93ab4d 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -768,16 +768,6 @@ The port number of the Ozone SCM block client service. - - ozone.scm.block.deletion.max.retry - 4096 - OZONE, SCM - - SCM wraps up many blocks in a deletion transaction and sends that to data - node for physical deletion periodically. This property determines how many - times SCM is going to retry sending a deletion operation to the data node. - - ozone.scm.block.deletion.per.dn.distribution.factor 8 diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 7072d6090baa..2a85e6e40071 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -27,6 +27,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -78,8 +79,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetExistContainerWithPipelinesInBatchRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetFailedDeletedBlocksTxnRequestProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetFailedDeletedBlocksTxnResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineRequestProto; @@ -102,7 +101,6 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerReportResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerStatusRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerStatusResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ResetDeletedBlockRetryCountRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMCloseContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMCloseContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMDeleteContainerRequestProto; @@ -789,31 +787,18 @@ public void transferLeadership(String nodeId) builder -> builder.setTransferScmLeadershipRequest(reqBuilder.build())); } + @Deprecated @Override public List getFailedDeletedBlockTxn(int count, long startTxId) throws IOException { - GetFailedDeletedBlocksTxnRequestProto request = - GetFailedDeletedBlocksTxnRequestProto.newBuilder() - .setCount(count) - .setStartTxId(startTxId) - .build(); - GetFailedDeletedBlocksTxnResponseProto resp = submitRequest( - Type.GetFailedDeletedBlocksTransaction, - builder -> builder.setGetFailedDeletedBlocksTxnRequest(request)). - getGetFailedDeletedBlocksTxnResponse(); - return resp.getDeletedBlocksTransactionsList(); + return Collections.emptyList(); } + @Deprecated @Override public int resetDeletedBlockRetryCount(List txIDs) throws IOException { - ResetDeletedBlockRetryCountRequestProto request = - ResetDeletedBlockRetryCountRequestProto.newBuilder() - .addAllTransactionId(txIDs) - .build(); - return submitRequest(Type.ResetDeletedBlockRetryCount, - builder -> builder.setResetDeletedBlockRetryCountRequest(request)). - getResetDeletedBlockRetryCountResponse().getResetCount(); + return 0; } /** diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index cadff023a061..3dfdea4c7324 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -77,9 +77,9 @@ message ScmContainerLocationRequest { optional GetContainerCountRequestProto getContainerCountRequest = 38; optional GetContainerReplicasRequestProto getContainerReplicasRequest = 39; optional ReplicationManagerReportRequestProto replicationManagerReportRequest = 40; - optional ResetDeletedBlockRetryCountRequestProto resetDeletedBlockRetryCountRequest = 41; + optional ResetDeletedBlockRetryCountRequestProto resetDeletedBlockRetryCountRequest = 41 [deprecated=true]; optional TransferLeadershipRequestProto transferScmLeadershipRequest = 42; - optional GetFailedDeletedBlocksTxnRequestProto getFailedDeletedBlocksTxnRequest = 43; + optional GetFailedDeletedBlocksTxnRequestProto getFailedDeletedBlocksTxnRequest = 43 [deprecated=true]; optional DecommissionScmRequestProto decommissionScmRequest = 44; optional SingleNodeQueryRequestProto singleNodeQueryRequest = 45; optional GetContainersOnDecomNodeRequestProto getContainersOnDecomNodeRequest = 46; @@ -134,9 +134,9 @@ message ScmContainerLocationResponse { optional GetContainerCountResponseProto getContainerCountResponse = 38; optional GetContainerReplicasResponseProto getContainerReplicasResponse = 39; optional ReplicationManagerReportResponseProto getReplicationManagerReportResponse = 40; - optional ResetDeletedBlockRetryCountResponseProto resetDeletedBlockRetryCountResponse = 41; + optional ResetDeletedBlockRetryCountResponseProto resetDeletedBlockRetryCountResponse = 41 [deprecated=true]; optional TransferLeadershipResponseProto transferScmLeadershipResponse = 42; - optional GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxnResponse = 43; + optional GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxnResponse = 43 [deprecated=true]; optional DecommissionScmResponseProto decommissionScmResponse = 44; optional SingleNodeQueryResponseProto singleNodeQueryResponse = 45; optional GetContainersOnDecomNodeResponseProto getContainersOnDecomNodeResponse = 46; diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto index 752a62bd3395..e48ed4d1c595 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto @@ -367,7 +367,8 @@ message DeletedBlocksTransaction { required int64 containerID = 2; repeated int64 localID = 3; // the retry time of sending deleting command to datanode. - required int32 count = 4; + // We don't have to store the retry count in DB. + optional int32 count = 4 [deprecated=true]; } // ACK message datanode sent to SCM, contains the result of diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java index 21e4d1b7c569..b1283ef773c9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java @@ -51,38 +51,13 @@ DatanodeDeletedBlockTransactions getTransactions( throws IOException; /** - * Return the failed transactions in batches in the log. A transaction is - * considered to be failed if it has been sent more than MAX_RETRY limit - * and its count is reset to -1. - * - * @param count Number of failed transactions to be returned. - * @param startTxId The least transaction id to start with. - * @return a list of failed deleted block transactions. - * @throws IOException - */ - List getFailedTransactions(int count, - long startTxId) throws IOException; - - /** - * Increments count for given list of transactions by 1. - * The log maintains a valid range of counts for each transaction - * [0, MAX_RETRY]. If exceed this range, resets it to -1 to indicate - * the transaction is no longer valid. - * - * @param txIDs - transaction ID. + * Increments count for the given list of transactions by 1. + * The retry count is maintained only for in-flight transactions, + * this will be useful in debugging. */ void incrementCount(List txIDs) throws IOException; - - /** - * Reset DeletedBlock transaction retry count. - * - * @param txIDs transactionId list to be reset - * @return num of successful reset - */ - int resetCount(List txIDs) throws IOException; - /** * Records the creation of a transaction for a DataNode. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java index c94036b9cbc5..4ee3a11f7e7d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java @@ -17,22 +17,18 @@ package org.apache.hadoop.hdds.scm.block; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR_DEFAULT; import static org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.SCMDeleteBlocksCommandStatusManager.CmdStatus; import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.DEL_TXN_ID; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; import java.io.IOException; import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; @@ -79,7 +75,6 @@ public class DeletedBlockLogImpl private static final Logger LOG = LoggerFactory.getLogger(DeletedBlockLogImpl.class); - private final int maxRetry; private final ContainerManager containerManager; private final Lock lock; // The access to DeletedBlocksTXTable is protected by @@ -92,7 +87,6 @@ public class DeletedBlockLogImpl transactionStatusManager; private long scmCommandTimeoutMs = Duration.ofSeconds(300).toMillis(); - private static final int LIST_ALL_FAILED_TRANSACTIONS = -1; private long lastProcessedTransactionId = -1; private final int logAppenderQueueByteLimit; private int deletionFactorPerDatanode; @@ -102,8 +96,6 @@ public DeletedBlockLogImpl(ConfigurationSource conf, ContainerManager containerManager, DBTransactionBuffer dbTxBuffer, ScmBlockDeletingServiceMetrics metrics) { - maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, - OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT); this.containerManager = containerManager; this.lock = new ReentrantLock(); @@ -142,37 +134,6 @@ void setDeleteBlocksFactorPerDatanode(int deleteBlocksFactorPerDatanode) { this.deletionFactorPerDatanode = deleteBlocksFactorPerDatanode; } - @Override - public List getFailedTransactions(int count, - long startTxId) throws IOException { - lock.lock(); - try { - final List failedTXs = Lists.newArrayList(); - try (Table.KeyValueIterator iter = - deletedBlockLogStateManager.getReadOnlyIterator()) { - if (count == LIST_ALL_FAILED_TRANSACTIONS) { - while (iter.hasNext()) { - DeletedBlocksTransaction delTX = iter.next().getValue(); - if (delTX.getCount() == -1) { - failedTXs.add(delTX); - } - } - } else { - iter.seek(startTxId); - while (iter.hasNext() && failedTXs.size() < count) { - DeletedBlocksTransaction delTX = iter.next().getValue(); - if (delTX.getCount() == -1 && delTX.getTxID() >= startTxId) { - failedTXs.add(delTX); - } - } - } - } - return failedTXs; - } finally { - lock.unlock(); - } - } - /** * {@inheritDoc} * @@ -182,62 +143,7 @@ public List getFailedTransactions(int count, @Override public void incrementCount(List txIDs) throws IOException { - lock.lock(); - try { - transactionStatusManager.incrementRetryCount(txIDs, maxRetry); - } finally { - lock.unlock(); - } - } - - /** - * {@inheritDoc} - * - */ - @Override - public int resetCount(List txIDs) throws IOException { - final int batchSize = 1000; - int totalProcessed = 0; - - try { - if (txIDs != null && !txIDs.isEmpty()) { - return resetRetryCount(txIDs); - } - - // If txIDs are null or empty, fetch all failed transactions in batches - long startTxId = 0; - List batch; - - do { - // Fetch the batch of failed transactions - batch = getFailedTransactions(batchSize, startTxId); - if (batch.isEmpty()) { - break; - } - - List batchTxIDs = batch.stream().map(DeletedBlocksTransaction::getTxID).collect(Collectors.toList()); - totalProcessed += resetRetryCount(new ArrayList<>(batchTxIDs)); - // Update startTxId to continue from the last processed transaction - startTxId = batch.get(batch.size() - 1).getTxID() + 1; - } while (!batch.isEmpty()); - - } catch (Exception e) { - throw new IOException("Error during transaction reset", e); - } - return totalProcessed; - } - - private int resetRetryCount(List txIDs) throws IOException { - int totalProcessed; - lock.lock(); - try { - transactionStatusManager.resetRetryCount(txIDs); - totalProcessed = deletedBlockLogStateManager.resetRetryCountOfTransactionInDB(new ArrayList<>( - txIDs)); - } finally { - lock.unlock(); - } - return totalProcessed; + transactionStatusManager.incrementRetryCount(txIDs); } private DeletedBlocksTransaction constructNewTransaction( @@ -254,17 +160,15 @@ private DeletedBlocksTransaction constructNewTransaction( public int getNumOfValidTransactions() throws IOException { lock.lock(); try { - final AtomicInteger num = new AtomicInteger(0); + int count = 0; try (Table.KeyValueIterator iter = deletedBlockLogStateManager.getReadOnlyIterator()) { while (iter.hasNext()) { - DeletedBlocksTransaction delTX = iter.next().getValue(); - if (delTX.getCount() > -1) { - num.incrementAndGet(); - } + iter.next(); + count++; } } - return num.get(); + return count; } finally { lock.unlock(); } @@ -350,16 +254,12 @@ private void getTransaction(DeletedBlocksTransaction tx, return; } - DeletedBlocksTransaction updatedTxn = - DeletedBlocksTransaction.newBuilder(tx) - .setCount(transactionStatusManager.getRetryCount(tx.getTxID())) - .build(); boolean flag = false; for (ContainerReplica replica : replicas) { final DatanodeID datanodeID = replica.getDatanodeDetails().getID(); if (!transactionStatusManager.isDuplication( datanodeID, tx.getTxID(), commandStatus)) { - transactions.addTransactionToDN(datanodeID, updatedTxn); + transactions.addTransactionToDN(datanodeID, tx); flag = true; } } @@ -466,15 +366,14 @@ public DatanodeDeletedBlockTransactions getTransactions( keyValue = iter.next(); DeletedBlocksTransaction txn = keyValue.getValue(); final ContainerID id = ContainerID.valueOf(txn.getContainerID()); + final ContainerInfo container = containerManager.getContainer(id); try { // HDDS-7126. When container is under replicated, it is possible // that container is deleted, but transactions are not deleted. - if (containerManager.getContainer(id).isDeleted()) { - LOG.warn("Container: {} was deleted for the " + - "transaction: {}.", id, txn); + if (container.isDeleted()) { + LOG.warn("Container: {} was deleted for the transaction: {}.", id, txn); txIDs.add(txn.getTxID()); - } else if (txn.getCount() > -1 && txn.getCount() <= maxRetry - && !containerManager.getContainer(id).isOpen()) { + } else if (!container.isOpen()) { Set replicas = containerManager .getContainerReplicas( ContainerID.valueOf(txn.getContainerID())); @@ -483,7 +382,7 @@ public DatanodeDeletedBlockTransactions getTransactions( } else { metrics.incrSkippedTransaction(); } - } else if (txn.getCount() >= maxRetry || containerManager.getContainer(id).isOpen()) { + } else if (containerManager.getContainer(id).isOpen()) { metrics.incrSkippedTransaction(); } } catch (ContainerNotFoundException ex) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java index 398fc47ec918..060b07bbdf93 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java @@ -36,10 +36,12 @@ void addTransactionsToDB(ArrayList txs) void removeTransactionsFromDB(ArrayList txIDs) throws IOException; + @Deprecated @Replicate void increaseRetryCountOfTransactionInDB(ArrayList txIDs) throws IOException; + @Deprecated @Replicate int resetRetryCountOfTransactionInDB(ArrayList txIDs) throws IOException; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java index fd5c39e79e65..b6976c3c3f38 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java @@ -23,7 +23,6 @@ import java.util.HashMap; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -54,7 +53,6 @@ public class DeletedBlockLogStateManagerImpl private ContainerManager containerManager; private final DBTransactionBuffer transactionBuffer; private final Set deletingTxIDs; - private final Set skippingRetryTxIDs; public DeletedBlockLogStateManagerImpl(ConfigurationSource conf, Table deletedTable, @@ -63,7 +61,6 @@ public DeletedBlockLogStateManagerImpl(ConfigurationSource conf, this.containerManager = containerManager; this.transactionBuffer = txBuffer; this.deletingTxIDs = ConcurrentHashMap.newKeySet(); - this.skippingRetryTxIDs = ConcurrentHashMap.newKeySet(); } @Override @@ -80,17 +77,13 @@ public Table.KeyValueIterator getReadOnlyIterato private void findNext() { while (iter.hasNext()) { - TypedTable.KeyValue next = iter - .next(); + final TypedTable.KeyValue next = iter.next(); final long txID = next.getKey(); - if ((deletingTxIDs == null || !deletingTxIDs.contains(txID)) && ( - skippingRetryTxIDs == null || !skippingRetryTxIDs - .contains(txID))) { + if ((!deletingTxIDs.contains(txID))) { nextTx = next; if (LOG.isTraceEnabled()) { - LOG.trace("DeletedBlocksTransaction matching txID:{}", - txID); + LOG.trace("DeletedBlocksTransaction matching txID:{}", txID); } return; } @@ -169,71 +162,30 @@ public void removeTransactionsFromDB(ArrayList txIDs) } } + @Deprecated @Override public void increaseRetryCountOfTransactionInDB( ArrayList txIDs) throws IOException { - for (Long txID : txIDs) { - DeletedBlocksTransaction block = - deletedTable.get(txID); - if (block == null) { - if (LOG.isDebugEnabled()) { - // This can occur due to race condition between retry and old - // service task where old task removes the transaction and the new - // task is resending - LOG.debug("Deleted TXID {} not found.", txID); - } - continue; - } - // if the retry time exceeds the maxRetry value - // then set the retry value to -1, stop retrying, admins can - // analyze those blocks and purge them manually by SCMCli. - DeletedBlocksTransaction.Builder builder = block.toBuilder().setCount(-1); - transactionBuffer.addToBuffer(deletedTable, txID, builder.build()); - if (skippingRetryTxIDs != null) { - skippingRetryTxIDs.add(txID); - } - } + // We don't store retry count in DB anymore. + // This method is being retained to ensure backward compatibility and prevent + // issues during minor upgrades. It will be removed in the future, during a major release. } + @Deprecated @Override public int resetRetryCountOfTransactionInDB(ArrayList txIDs) throws IOException { - Objects.requireNonNull(txIDs, "txIds cannot be null."); - int resetCount = 0; - for (long txId: txIDs) { - try { - DeletedBlocksTransaction transaction = deletedTable.get(txId); - if (transaction == null) { - LOG.warn("txId {} is not found in deletedTable.", txId); - continue; - } - if (transaction.getCount() != -1) { - LOG.warn("txId {} has already been reset in deletedTable.", txId); - continue; - } - transactionBuffer.addToBuffer(deletedTable, txId, - transaction.toBuilder().setCount(0).build()); - resetCount += 1; - if (LOG.isDebugEnabled()) { - LOG.info("Reset deleted block Txn retry count to 0 in container {}" + - " with txnId {} ", transaction.getContainerID(), txId); - } - } catch (IOException ex) { - LOG.error("Could not reset deleted block transaction {}.", txId, ex); - throw ex; - } - } - LOG.info("Reset in total {} deleted block Txn retry count", resetCount); - return resetCount; + // We don't reset retry count anymore. + // This method is being retained to ensure backward compatibility and prevent + // issues during minor upgrades. It will be removed in the future, during a major release. + return 0; } @Override public void onFlush() { // onFlush() can be invoked only when ratis is enabled. Preconditions.checkNotNull(deletingTxIDs); - Preconditions.checkNotNull(skippingRetryTxIDs); deletingTxIDs.clear(); - skippingRetryTxIDs.clear(); } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java index 6cc99605690c..6d4d4ba30167 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -365,37 +366,10 @@ Map> getScmCmdStatusRecord() { } } - public void incrementRetryCount(List txIDs, long maxRetry) - throws IOException { - ArrayList txIDsToUpdate = new ArrayList<>(); - for (Long txID : txIDs) { - int currentCount = - transactionToRetryCountMap.getOrDefault(txID, 0); - if (currentCount > maxRetry) { - continue; - } else { - currentCount += 1; - if (currentCount > maxRetry) { - txIDsToUpdate.add(txID); - } - transactionToRetryCountMap.put(txID, currentCount); - } - } - - if (!txIDsToUpdate.isEmpty()) { - deletedBlockLogStateManager - .increaseRetryCountOfTransactionInDB(txIDsToUpdate); - } - } - - public void resetRetryCount(List txIDs) throws IOException { - for (Long txID: txIDs) { - transactionToRetryCountMap.computeIfPresent(txID, (key, value) -> 0); - } - } - - int getRetryCount(long txID) { - return transactionToRetryCountMap.getOrDefault(txID, 0); + public void incrementRetryCount(List txIDs) { + CompletableFuture.runAsync(() -> + txIDs.forEach(tx -> + transactionToRetryCountMap.compute(tx, (k, v) -> (v == null) ? 1 : v + 1))); } public void onSent(DatanodeDetails dnId, SCMCommand scmCommand) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 8ce5ae44ab6b..b9d4b9d6aef5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -1324,6 +1324,7 @@ public GetContainerCountResponseProto getClosedContainerCount( .build(); } + @Deprecated public GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxn( GetFailedDeletedBlocksTxnRequestProto request) throws IOException { long startTxId = request.hasStartTxId() ? request.getStartTxId() : 0; @@ -1333,6 +1334,7 @@ public GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxn( .build(); } + @Deprecated public ResetDeletedBlockRetryCountResponseProto getResetDeletedBlockRetryCount(ResetDeletedBlockRetryCountRequestProto request) throws IOException { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 370493b6dc3c..c9d4f0b07926 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -82,7 +82,6 @@ import org.apache.hadoop.hdds.scm.container.balancer.IllegalContainerBalancerStateException; import org.apache.hadoop.hdds.scm.container.balancer.InvalidContainerBalancerConfigurationException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; -import org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper; import org.apache.hadoop.hdds.scm.container.reconciliation.ReconciliationEligibilityHandler; import org.apache.hadoop.hdds.scm.container.reconciliation.ReconciliationEligibilityHandler.EligibilityResult; import org.apache.hadoop.hdds.scm.events.SCMEvents; @@ -960,47 +959,17 @@ public void transferLeadership(String newLeaderId) SCMAction.TRANSFER_LEADERSHIP, auditMap)); } + @Deprecated @Override public List getFailedDeletedBlockTxn(int count, long startTxId) throws IOException { - List result; - Map auditMap = Maps.newHashMap(); - auditMap.put("count", String.valueOf(count)); - auditMap.put("startTxId", String.valueOf(startTxId)); - - try { - result = scm.getScmBlockManager().getDeletedBlockLog() - .getFailedTransactions(count, startTxId).stream() - .map(DeletedBlocksTransactionInfoWrapper::fromTxn) - .collect(Collectors.toList()); - AUDIT.logWriteSuccess(buildAuditMessageForSuccess( - SCMAction.GET_FAILED_DELETED_BLOCKS_TRANSACTION, auditMap)); - return result; - } catch (IOException ex) { - AUDIT.logReadFailure( - buildAuditMessageForFailure( - SCMAction.GET_FAILED_DELETED_BLOCKS_TRANSACTION, auditMap, ex) - ); - throw ex; - } + return Collections.emptyList(); } + @Deprecated @Override public int resetDeletedBlockRetryCount(List txIDs) throws IOException { - final Map auditMap = Maps.newHashMap(); - auditMap.put("txIDs", txIDs.toString()); - try { - getScm().checkAdminAccess(getRemoteUser(), false); - int count = scm.getScmBlockManager().getDeletedBlockLog(). - resetCount(txIDs); - AUDIT.logWriteSuccess(buildAuditMessageForSuccess( - SCMAction.RESET_DELETED_BLOCK_RETRY_COUNT, auditMap)); - return count; - } catch (Exception ex) { - AUDIT.logWriteFailure(buildAuditMessageForFailure( - SCMAction.RESET_DELETED_BLOCK_RETRY_COUNT, auditMap, ex)); - throw ex; - } + return 0; } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java index c8e1351297e0..95e13146deed 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java @@ -52,9 +52,7 @@ public enum SCMAction implements AuditAction { GET_CONTAINER_WITH_PIPELINE_BATCH, ADD_SCM, GET_REPLICATION_MANAGER_REPORT, - RESET_DELETED_BLOCK_RETRY_COUNT, TRANSFER_LEADERSHIP, - GET_FAILED_DELETED_BLOCKS_TRANSACTION, GET_CONTAINER_REPLICAS, GET_CONTAINERS_ON_DECOM_NODE, DECOMMISSION_NODES, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java index eba8a8eeb184..bc0c5cba4d1a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java @@ -17,9 +17,9 @@ package org.apache.hadoop.hdds.scm.block; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; @@ -111,7 +111,6 @@ public class TestDeletedBlockLog { @BeforeEach public void setup() throws Exception { conf = new OzoneConfiguration(); - conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); replicationManager = mock(ReplicationManager.class); SCMConfigurator configurator = new SCMConfigurator(); @@ -234,19 +233,6 @@ private void addTransactions(Map> containerBlocksMap, } } - private void incrementCount(List txIDs) throws IOException { - deletedBlockLog.incrementCount(txIDs); - scmHADBTransactionBuffer.flush(); - // mock scmHADBTransactionBuffer does not flush deletedBlockLog - deletedBlockLog.onFlush(); - } - - private void resetCount(List txIDs) throws IOException { - deletedBlockLog.resetCount(txIDs); - scmHADBTransactionBuffer.flush(); - deletedBlockLog.onFlush(); - } - private void commitTransactions( List transactionResults, DatanodeDetails... dns) throws IOException { @@ -337,45 +323,6 @@ public void testContainerManagerTransactionId() throws Exception { } } - @Test - public void testIncrementCount() throws Exception { - int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); - - // Create 30 TXs in the log. - addTransactions(generateData(30), true); - mockContainerHealthResult(true); - - // This will return all TXs, total num 30. - List blocks = getAllTransactions(); - List txIDs = blocks.stream().map(DeletedBlocksTransaction::getTxID) - .distinct().collect(Collectors.toList()); - assertEquals(30, txIDs.size()); - - for (DeletedBlocksTransaction block : blocks) { - assertEquals(0, block.getCount()); - } - - for (int i = 0; i < maxRetry; i++) { - incrementCount(txIDs); - } - blocks = getAllTransactions(); - for (DeletedBlocksTransaction block : blocks) { - assertEquals(maxRetry, block.getCount()); - } - - // Increment another time so it exceed the maxRetry. - // On this call, count will be set to -1 which means TX eventually fails. - incrementCount(txIDs); - blocks = getAllTransactions(); - for (DeletedBlocksTransaction block : blocks) { - assertEquals(-1, block.getCount()); - } - - // If all TXs are failed, getTransactions call will always return nothing. - blocks = getAllTransactions(); - assertEquals(0, blocks.size()); - } - private void mockContainerHealthResult(Boolean healthy) { ContainerInfo containerInfo = mock(ContainerInfo.class); ContainerHealthResult healthResult = @@ -387,55 +334,6 @@ private void mockContainerHealthResult(Boolean healthy) { .getContainerReplicationHealth(any(), any()); } - @Test - public void testResetCount() throws Exception { - int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); - - // Create 30 TXs in the log. - addTransactions(generateData(30), true); - mockContainerHealthResult(true); - - // This will return all TXs, total num 30. - List blocks = getAllTransactions(); - List txIDs = blocks.stream().map(DeletedBlocksTransaction::getTxID) - .distinct().collect(Collectors.toList()); - - for (int i = 0; i < maxRetry; i++) { - incrementCount(txIDs); - } - - // Increment another time so it exceed the maxRetry. - // On this call, count will be set to -1 which means TX eventually fails. - incrementCount(txIDs); - blocks = getAllTransactions(); - for (DeletedBlocksTransaction block : blocks) { - assertEquals(-1, block.getCount()); - } - - // If all TXs are failed, getTransactions call will always return nothing. - blocks = getAllTransactions(); - assertEquals(0, blocks.size()); - - // Reset the retry count, these transactions should be accessible. - resetCount(txIDs); - blocks = getAllTransactions(); - for (DeletedBlocksTransaction block : blocks) { - assertEquals(0, block.getCount()); - } - - // Increment for the reset transactions. - // Lets the SCM delete the transaction and wait for the DN reply - // to timeout, thus allowing the transaction to resend the - deletedBlockLog.setScmCommandTimeoutMs(-1L); - incrementCount(txIDs); - blocks = getAllTransactions(); - for (DeletedBlocksTransaction block : blocks) { - assertEquals(1, block.getCount()); - } - - assertEquals(30 * THREE, blocks.size()); - } - @Test public void testAddTransactionsIsBatched() throws Exception { conf.setStorageSize(ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_APPENDER_QUEUE_BYTE_LIMIT, 1, StorageUnit.KB); @@ -453,19 +351,16 @@ public void testAddTransactionsIsBatched() throws Exception { @Test public void testSCMDelIteratorProgress() throws Exception { - int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); - // CASE1: When all transactions are valid and available // Create 8 TXs in the log. int noOfTransactions = 8; addTransactions(generateData(noOfTransactions), true); mockContainerHealthResult(true); List blocks; - List txIDs = new ArrayList<>(); int i = 1; while (i < noOfTransactions) { - // In each iteration read two transaction, API returns all the transactions in order. + // In each iteration read two transactions, API returns all the transactions in order. // 1st iteration: {1, 2} // 2nd iteration: {3, 4} // 3rd iteration: {5, 6} @@ -475,48 +370,9 @@ public void testSCMDelIteratorProgress() throws Exception { assertEquals(blocks.get(1).getTxID(), i++); } - // CASE2: When some transactions are not available for delete in the current iteration, - // either due to max retry reach or some other issue. - // New transactions Id is { 9, 10, 11, 12, 13, 14, 15, 16} - addTransactions(generateData(noOfTransactions), true); - mockContainerHealthResult(true); - - // Mark transaction Id 11 as reached max retry count so that it will be ignored - // by scm deleting service while fetching transaction for delete - int ignoreTransactionId = 11; - txIDs.add((long) ignoreTransactionId); - for (i = 0; i < maxRetry; i++) { - incrementCount(txIDs); - } - incrementCount(txIDs); - - i = 9; - while (true) { - // In each iteration read two transaction. - // If any transaction which is not available for delete in the current iteration, - // it will be ignored and will be re-checked again only after complete table is read. - // 1st iteration: {9, 10} - // 2nd iteration: {12, 13} Transaction 11 is ignored here - // 3rd iteration: {14, 15} Transaction 11 is available here, - // but it will be read only when all db records are read till the end. - // 4th iteration: {16, 11} Since iterator reached at the end of table after reading transaction 16, - // Iterator starts from beginning again, and it returns transaction 11 as well - blocks = getTransactions(2 * BLOCKS_PER_TXN * THREE); - if (i == ignoreTransactionId) { - i++; - } - assertEquals(blocks.get(0).getTxID(), i++); - if (i == 17) { - assertEquals(blocks.get(1).getTxID(), ignoreTransactionId); - break; - } - assertEquals(blocks.get(1).getTxID(), i++); - - if (i == 14) { - // Reset transaction 11 so that it will be available in scm key deleting service in the subsequent iterations. - resetCount(txIDs); - } - } + // Since all the transactions are in-flight, the getTransaction should return empty list. + blocks = getTransactions(2 * BLOCKS_PER_TXN * THREE); + assertTrue(blocks.isEmpty()); } @Test @@ -776,7 +632,6 @@ public void testRandomOperateTransactions() throws Exception { for (DeletedBlocksTransaction block : blocks) { txIDs.add(block.getTxID()); } - incrementCount(txIDs); } else if (state == 2) { commitTransactions(blocks); committed += blocks.size() / THREE; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 2d5db1dfaf36..133166dec487 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -39,7 +39,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -551,18 +550,6 @@ public void transferLeadership(String newLeaderId) throws IOException { storageContainerLocationClient.transferLeadership(newLeaderId); } - @Override - public List getFailedDeletedBlockTxn(int count, - long startTxId) throws IOException { - return storageContainerLocationClient.getFailedDeletedBlockTxn(count, - startTxId); - } - - @Override - public int resetDeletedBlockRetryCount(List txIDs) throws IOException { - return storageContainerLocationClient.resetDeletedBlockRetryCount(txIDs); - } - @Override public List getDatanodeUsageInfo( String address, String uuid) throws IOException { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java deleted file mode 100644 index 5862d309d645..000000000000 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.admin.scm; - -import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import picocli.CommandLine; - -/** - * Subcommand to group container related operations. - */ -@CommandLine.Command( - name = "deletedBlocksTxn", - description = "SCM deleted blocks transaction specific operations", - mixinStandardHelpOptions = true, - versionProvider = HddsVersionProvider.class, - subcommands = { - GetFailedDeletedBlocksTxnSubcommand.class, - ResetDeletedBlockRetryCountSubcommand.class, - }) -public class DeletedBlocksTxnCommands { - -} - diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java deleted file mode 100644 index c9b8d7fbe534..000000000000 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.admin.scm; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; -import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper; -import org.apache.hadoop.hdds.server.JsonUtils; -import picocli.CommandLine; - -/** - * Handler of getting expired deleted blocks from SCM side. - */ -@CommandLine.Command( - name = "ls", - description = "Print the failed DeletedBlocksTransaction(retry count = -1)", - mixinStandardHelpOptions = true, - versionProvider = HddsVersionProvider.class) -public class GetFailedDeletedBlocksTxnSubcommand extends ScmSubcommand { - - @CommandLine.ArgGroup(multiplicity = "1") - private TransactionsOption group; - - @CommandLine.Option(names = {"-s", "--startTxId", "--start-tx-id"}, - defaultValue = "0", - description = "The least transaction ID to start with, default 0." + - " Only work with -c/--count") - private long startTxId; - - @CommandLine.Option(names = {"-o", "--out"}, - description = "Print transactions into file in JSON format.") - private String fileName; - - private static final int LIST_ALL_FAILED_TRANSACTIONS = -1; - - @Override - public void execute(ScmClient client) throws IOException { - List response; - int count = group.getAll ? LIST_ALL_FAILED_TRANSACTIONS : group.count; - response = client.getFailedDeletedBlockTxn(count, startTxId); - List txns = response.stream() - .map(DeletedBlocksTransactionInfoWrapper::fromProtobuf) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - - String result = JsonUtils.toJsonStringWithDefaultPrettyPrinter(txns); - if (fileName != null) { - try (OutputStream f = Files.newOutputStream(Paths.get(fileName))) { - f.write(result.getBytes(StandardCharsets.UTF_8)); - } - } else { - System.out.println(result); - } - } - - static class TransactionsOption { - @CommandLine.Option(names = {"-a", "--all"}, - description = "Get all the failed transactions.") - private boolean getAll; - - @CommandLine.Option(names = {"-c", "--count"}, - defaultValue = "20", - description = "Get at most the count number of the" + - " failed transactions.") - private int count; - } -} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java deleted file mode 100644 index b93b8d50b43a..000000000000 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.admin.scm; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper; -import org.apache.hadoop.hdds.server.JsonUtils; -import picocli.CommandLine; - -/** - * Handler of resetting expired deleted blocks from SCM side. - */ -@CommandLine.Command( - name = "reset", - description = "Reset the retry count of failed DeletedBlocksTransaction", - mixinStandardHelpOptions = true, - versionProvider = HddsVersionProvider.class) -public class ResetDeletedBlockRetryCountSubcommand extends ScmSubcommand { - - @CommandLine.ArgGroup(multiplicity = "1") - private TransactionsOption group; - - static class TransactionsOption { - @CommandLine.Option(names = {"-a", "--all"}, - description = "Reset all expired deleted block transaction retry" + - " count from -1 to 0.") - private boolean resetAll; - - @CommandLine.Option(names = {"-l", "--list"}, - split = ",", - paramLabel = "txID", - description = "Reset the only given deletedBlock transaction ID" + - " list. Example: 100,101,102.(Separated by ',')") - private List txList; - - @CommandLine.Option(names = {"-i", "--in"}, - description = "Use file as input, need to be JSON Array format and " + - "contains multi \"txID\" key. Example: [{\"txID\":1},{\"txID\":2}]") - private String fileName; - } - - @Override - public void execute(ScmClient client) throws IOException { - int count; - if (group.resetAll) { - count = client.resetDeletedBlockRetryCount(new ArrayList<>()); - } else if (group.fileName != null) { - List txIDs; - try (InputStream in = Files.newInputStream(Paths.get(group.fileName)); - Reader fileReader = new InputStreamReader(in, - StandardCharsets.UTF_8)) { - DeletedBlocksTransactionInfoWrapper[] txns = JsonUtils.readFromReader(fileReader, - DeletedBlocksTransactionInfoWrapper[].class); - txIDs = Arrays.stream(txns) - .map(DeletedBlocksTransactionInfoWrapper::getTxID) - .sorted() - .distinct() - .collect(Collectors.toList()); - System.out.println("Num of loaded txIDs: " + txIDs.size()); - if (!txIDs.isEmpty()) { - System.out.println("The first loaded txID: " + txIDs.get(0)); - System.out.println("The last loaded txID: " + - txIDs.get(txIDs.size() - 1)); - } - } catch (IOException ex) { - final String message = "Failed to parse the file " + group.fileName + ": " + ex.getMessage(); - System.out.println(message); - throw new IOException(message, ex); - } - - count = client.resetDeletedBlockRetryCount(txIDs); - } else { - if (group.txList == null || group.txList.isEmpty()) { - System.out.println("TransactionId list should not be empty"); - return; - } - count = client.resetDeletedBlockRetryCount(group.txList); - } - System.out.println("Reset " + count + " deleted block transactions in" + - " SCM."); - } -} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java index 1bab77a4eb41..0228eb221be8 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java @@ -36,7 +36,6 @@ FinalizeScmUpgradeSubcommand.class, FinalizationScmStatusSubcommand.class, TransferScmLeaderSubCommand.class, - DeletedBlocksTxnCommands.class, DecommissionScmSubcommand.class, RotateKeySubCommand.class }) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 1c220ef8d3a7..e407e30bffc6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -303,7 +303,7 @@ private void testBlockDeletionTransactions(MiniOzoneCluster cluster) throws Exce try { cluster.getStorageContainerManager().getScmHAManager() .asSCMHADBTransactionBuffer().flush(); - return delLog.getFailedTransactions(-1, 0).isEmpty(); + return delLog.getNumOfValidTransactions() == 0; } catch (IOException e) { return false; } @@ -333,7 +333,6 @@ private static void configureBlockDeletion(OzoneConfiguration conf) { TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 200, TimeUnit.MILLISECONDS); - conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); // Reset container provision size, otherwise only one container // is created by default. conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 10 * KEY_COUNT); @@ -405,7 +404,6 @@ public void testBlockDeletingThrottling() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); - conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); ScmConfig scmConfig = conf.getObject(ScmConfig.class); @@ -660,7 +658,6 @@ public void testCloseContainerCommandOnRestart() throws Exception { int numKeys = 15; OzoneConfiguration conf = new OzoneConfiguration(); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS); - conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java deleted file mode 100644 index 72a5ed161373..000000000000 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.shell; - -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.commons.lang3.RandomUtils; -import org.apache.hadoop.hdds.client.RatisReplicationConfig; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; -import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import org.apache.hadoop.hdds.scm.container.ContainerStateManager; -import org.apache.hadoop.hdds.scm.pipeline.PipelineID; -import org.apache.hadoop.hdds.scm.server.StorageContainerManager; -import org.apache.hadoop.ozone.MiniOzoneCluster; -import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; -import org.apache.hadoop.ozone.admin.scm.GetFailedDeletedBlocksTxnSubcommand; -import org.apache.hadoop.ozone.admin.scm.ResetDeletedBlockRetryCountSubcommand; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import picocli.CommandLine; - -/** - * Test for DeletedBlocksTxnSubcommand Cli. - */ -public class TestDeletedBlocksTxnShell { - - private static final Logger LOG = LoggerFactory - .getLogger(TestDeletedBlocksTxnShell.class); - - private static final String SCM_SERVICE_ID = "scm-service-test1"; - private static final int NUM_OF_SCMS = 3; - - private final PrintStream originalOut = System.out; - private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); - private MiniOzoneHAClusterImpl cluster = null; - private OzoneConfiguration conf; - private File txnFile; - - private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); - - @TempDir - private Path tempDir; - - /** - * Create a MiniOzoneHACluster for testing. - * - * @throws IOException - */ - @BeforeEach - public void init() throws Exception { - conf = new OzoneConfiguration(); - - conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); - - cluster = MiniOzoneCluster.newHABuilder(conf) - .setSCMServiceId(SCM_SERVICE_ID) - .setNumOfStorageContainerManagers(NUM_OF_SCMS) - .setNumOfActiveSCMs(NUM_OF_SCMS) - .setNumOfOzoneManagers(1) - .build(); - cluster.waitForClusterToBeReady(); - - txnFile = tempDir.resolve("txn.txt").toFile(); - LOG.info("txnFile path: {}", txnFile.getAbsolutePath()); - System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); - } - - /** - * Shutdown MiniDFSCluster. - */ - @AfterEach - public void shutdown() { - if (cluster != null) { - cluster.shutdown(); - } - System.setOut(originalOut); - } - - //> - private Map> generateData(int dataSize) throws Exception { - Map> blockMap = new HashMap<>(); - int continerIDBase = RandomUtils.secure().randomInt(0, 100); - int localIDBase = RandomUtils.secure().randomInt(0, 1000); - for (int i = 0; i < dataSize; i++) { - long containerID = continerIDBase + i; - updateContainerMetadata(containerID); - List blocks = new ArrayList<>(); - for (int j = 0; j < 5; j++) { - long localID = localIDBase + j; - blocks.add(localID); - } - blockMap.put(containerID, blocks); - } - return blockMap; - } - - private void updateContainerMetadata(long cid) throws Exception { - final ContainerInfo container = - new ContainerInfo.Builder() - .setContainerID(cid) - .setReplicationConfig(RatisReplicationConfig.getInstance(THREE)) - .setState(HddsProtos.LifeCycleState.CLOSED) - .setOwner("TestDeletedBlockLog") - .setPipelineID(PipelineID.randomId()) - .build(); - final Set replicaSet = cluster.getHddsDatanodes() - .subList(0, 3) - .stream() - .map(dn -> ContainerReplica.newBuilder() - .setContainerID(container.containerID()) - .setContainerState(State.CLOSED) - .setDatanodeDetails(dn.getDatanodeDetails()) - .build()) - .collect(Collectors.toSet()); - ContainerStateManager containerStateManager = getSCMLeader(). - getContainerManager().getContainerStateManager(); - containerStateManager.addContainer(container.getProtobuf()); - for (ContainerReplica replica: replicaSet) { - containerStateManager.updateContainerReplica(replica); - } - } - - private StorageContainerManager getSCMLeader() { - return cluster.getStorageContainerManagersList() - .stream().filter(a -> a.getScmContext().isLeaderReady()) - .collect(Collectors.toList()).get(0); - } - - private void flush() throws Exception { - // only flush leader here, avoid the follower concurrent flush and write - getSCMLeader().getScmHAManager().asSCMHADBTransactionBuffer().flush(); - } - - @Test - public void testDeletedBlocksTxnSubcommand() throws Exception { - int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); - int currentValidTxnNum; - // add 30 block deletion transactions - DeletedBlockLog deletedBlockLog = getSCMLeader(). - getScmBlockManager().getDeletedBlockLog(); - deletedBlockLog.addTransactions(generateData(30)); - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(30, currentValidTxnNum); - - // let the first 20 txns be failed - List txIds = new ArrayList<>(); - for (int i = 1; i < 21; i++) { - txIds.add((long) i); - } - // increment retry count than threshold, count will be set to -1 - for (int i = 0; i < maxRetry + 1; i++) { - deletedBlockLog.incrementCount(txIds); - } - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(10, currentValidTxnNum); - - ContainerOperationClient scmClient = new ContainerOperationClient(conf); - CommandLine cmd; - // getFailedDeletedBlocksTxn cmd will print all the failed txns - GetFailedDeletedBlocksTxnSubcommand getCommand = - new GetFailedDeletedBlocksTxnSubcommand(); - cmd = new CommandLine(getCommand); - cmd.parseArgs("-a"); - getCommand.execute(scmClient); - int matchCount = 0; - Pattern p = Pattern.compile("\"txID\" : \\d+", Pattern.MULTILINE); - Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); - while (m.find()) { - matchCount += 1; - } - assertEquals(20, matchCount); - - // print the first 10 failed txns info into file - cmd.parseArgs("-o", txnFile.getAbsolutePath(), "-c", "10"); - getCommand.execute(scmClient); - assertThat(txnFile).exists(); - - ResetDeletedBlockRetryCountSubcommand resetCommand = - new ResetDeletedBlockRetryCountSubcommand(); - cmd = new CommandLine(resetCommand); - - // reset the txns in file - cmd.parseArgs("-i", txnFile.getAbsolutePath()); - resetCommand.execute(scmClient); - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(20, currentValidTxnNum); - - // reset the given txIds list - cmd.parseArgs("-l", "11,12,13,14,15"); - resetCommand.execute(scmClient); - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(25, currentValidTxnNum); - - // reset the non-existing txns and valid txns, should do nothing - cmd.parseArgs("-l", "1,2,3,4,5,100,101,102,103,104,105"); - resetCommand.execute(scmClient); - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(25, currentValidTxnNum); - - // reset all the result expired txIds, all transactions should be available - cmd.parseArgs("-a"); - resetCommand.execute(scmClient); - flush(); - currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions(); - LOG.info("Valid num of txns: {}", currentValidTxnNum); - assertEquals(30, currentValidTxnNum); - - // Fail first 20 txns be failed - // increment retry count than threshold, count will be set to -1 - for (int i = 0; i < maxRetry + 1; i++) { - deletedBlockLog.incrementCount(txIds); - } - flush(); - - GetFailedDeletedBlocksTxnSubcommand getFailedBlockCommand = - new GetFailedDeletedBlocksTxnSubcommand(); - outContent.reset(); - cmd = new CommandLine(getFailedBlockCommand); - // set start transaction as 15 - cmd.parseArgs("-c", "5", "-s", "15"); - getFailedBlockCommand.execute(scmClient); - matchCount = 0; - p = Pattern.compile("\"txID\" : \\d+", Pattern.MULTILINE); - m = p.matcher(outContent.toString(DEFAULT_ENCODING)); - while (m.find()) { - matchCount += 1; - } - assertEquals(5, matchCount); - } -}