diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java index bb42d8a0f57..7a77b351636 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestDeleteWithInAdequateDN.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; @@ -38,6 +39,7 @@ import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -65,6 +67,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; @@ -73,6 +76,8 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; @@ -103,6 +108,7 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, @@ -281,6 +287,8 @@ void testDeleteKeyWithInAdequateDN() throws Exception { //cluster.getOzoneManager().deleteKey(keyArgs); client.getObjectStore().getVolume(volumeName).getBucket(bucketName). deleteKey("ratis"); + // flush deletedBlockLog + waitForDeletedBlockLog(); // make sure the chunk was never deleted on the leader even though // deleteBlock handler is invoked @@ -320,4 +328,19 @@ void testDeleteKeyWithInAdequateDN() throws Exception { assertSame(ContainerProtos.Result.UNABLE_TO_FIND_CHUNK, e.getResult()); } } + + private void waitForDeletedBlockLog() throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(() -> { + StorageContainerManager scm = cluster.getStorageContainerManager(); + try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); + if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() > 0) { + return true; + } + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 100, 3000); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java index 719715ac8b3..e03ccf25ade 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java @@ -47,7 +47,6 @@ import org.apache.hadoop.hdds.scm.block.ScmBlockDeletingServiceMetrics; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ContainerStateManager; import org.apache.hadoop.hdds.scm.container.replication.LegacyReplicationManager; @@ -96,6 +95,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; @@ -135,6 +135,7 @@ public void init() throws Exception { GenericTestUtils.setLogLevel(LegacyReplicationManager.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG); + conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true); conf.set("ozone.replication.allowed-configs", "^(RATIS/THREE)|(EC/2-1-256k)$"); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, @@ -241,6 +242,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // verify key blocks were created in DN. GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyBlocksCreated(omKeyLocationInfoGroupList); return true; } catch (Throwable t) { @@ -285,6 +287,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // The blocks should be deleted in the DN. GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyBlocksDeleted(omKeyLocationInfoGroupList); return true; } catch (Throwable t) { @@ -301,6 +304,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception { // Verify transactions committed GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); verifyTransactionsCommitted(); return true; } catch (Throwable t) { @@ -380,10 +384,16 @@ public void testContainerStatisticsAfterDelete() throws Exception { writeClient.deleteKey(keyArgs); // Wait for blocks to be deleted and container reports to be processed - GenericTestUtils.waitFor(() -> - scm.getContainerManager().getContainers().stream() - .allMatch(c -> c.getUsedBytes() == 0 && - c.getNumberOfKeys() == 0), 500, 20000); + GenericTestUtils.waitFor(() -> { + try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return scm.getContainerManager().getContainers().stream() + .allMatch(c -> c.getUsedBytes() == 0 && + c.getNumberOfKeys() == 0); + }, 500, 20000); Thread.sleep(5000); // Verify that pending block delete num are as expected with resent cmds cluster.getHddsDatanodes().forEach(dn -> { @@ -428,6 +438,7 @@ public void testContainerStatisticsAfterDelete() throws Exception { assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -519,14 +530,14 @@ public void testContainerStateAfterDNRestart() throws Exception { GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); return scm.getContainerManager().getContainerReplicas( containerId).stream(). allMatch(replica -> replica.isEmpty()); - } catch (ContainerNotFoundException e) { + } catch (IOException e) { throw new RuntimeException(e); } - }, - 100, 10 * 1000); + }, 100, 10 * 1000); // Container state should be empty now as key got deleted assertTrue(getContainerFromDN( @@ -549,6 +560,7 @@ public void testContainerStateAfterDNRestart() throws Exception { assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -563,7 +575,6 @@ public void testContainerStateAfterDNRestart() throws Exception { } return true; }, 500, 30000); - LOG.info(metrics.toString()); } /** @@ -649,14 +660,14 @@ public void testContainerDeleteWithInvalidKeyCount() // Ensure isEmpty are true for all replica after delete key GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); return scm.getContainerManager().getContainerReplicas( containerId).stream() .allMatch(replica -> replica.isEmpty()); - } catch (ContainerNotFoundException e) { + } catch (IOException e) { throw new RuntimeException(e); } - }, - 500, 5 * 2000); + }, 500, 5 * 2000); // Update container replica by making invalid keyCount in one replica ContainerReplica replicaOne = ContainerReplica.newBuilder() @@ -686,6 +697,7 @@ public void testContainerDeleteWithInvalidKeyCount() assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState()); try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); assertEquals(HddsProtos.LifeCycleState.DELETED, scm.getScmMetadataStore().getContainerTable() .get(container.containerID()).getState()); @@ -817,6 +829,7 @@ public void testBlockDeleteCommandParallelProcess() throws Exception { // Wait for block delete command sent from OM GenericTestUtils.waitFor(() -> { try { + scm.getScmHAManager().asSCMHADBTransactionBuffer().flush(); if (scm.getScmBlockManager().getDeletedBlockLog() .getNumOfValidTransactions() > 0) { return true;