Skip to content

Commit

Permalink
HDDS-11989. Enable SCM Ratis in tests related to DeletedBlockLog
Browse files Browse the repository at this point in the history
  • Loading branch information
chungen0126 committed Dec 27, 2024
1 parent f5e5493 commit 962b562
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.hadoop.hdds.client.BlockID;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
Expand All @@ -38,6 +39,7 @@
import org.apache.hadoop.hdds.scm.XceiverClientSpi;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
Expand Down Expand Up @@ -65,6 +67,7 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
Expand All @@ -73,6 +76,8 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;

import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
Expand Down Expand Up @@ -103,6 +108,7 @@ public static void init() throws Exception {

conf = new OzoneConfiguration();

conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true);
conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 100,
TimeUnit.MILLISECONDS);
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200,
Expand Down Expand Up @@ -281,6 +287,8 @@ void testDeleteKeyWithInAdequateDN() throws Exception {
//cluster.getOzoneManager().deleteKey(keyArgs);
client.getObjectStore().getVolume(volumeName).getBucket(bucketName).
deleteKey("ratis");
// flush deletedBlockLog
waitForDeletedBlockLog();
// make sure the chunk was never deleted on the leader even though
// deleteBlock handler is invoked

Expand Down Expand Up @@ -320,4 +328,19 @@ void testDeleteKeyWithInAdequateDN() throws Exception {
assertSame(ContainerProtos.Result.UNABLE_TO_FIND_CHUNK, e.getResult());
}
}

private void waitForDeletedBlockLog() throws InterruptedException, TimeoutException {
GenericTestUtils.waitFor(() -> {
StorageContainerManager scm = cluster.getStorageContainerManager();
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
if (scm.getScmBlockManager().getDeletedBlockLog().getNumOfValidTransactions() > 0) {
return true;
}
return false;
} catch (IOException e) {
throw new RuntimeException(e);
}
}, 100, 3000);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import org.apache.hadoop.hdds.scm.block.ScmBlockDeletingServiceMetrics;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ContainerStateManager;
import org.apache.hadoop.hdds.scm.container.replication.LegacyReplicationManager;
Expand Down Expand Up @@ -96,6 +95,7 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EXPIRED_CONTAINER_REPLICA_OP_SCRUB_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL;
Expand Down Expand Up @@ -135,6 +135,7 @@ public void init() throws Exception {
GenericTestUtils.setLogLevel(LegacyReplicationManager.LOG, Level.DEBUG);
GenericTestUtils.setLogLevel(ReplicationManager.LOG, Level.DEBUG);

conf.setBoolean(OZONE_SCM_HA_ENABLE_KEY, true);
conf.set("ozone.replication.allowed-configs",
"^(RATIS/THREE)|(EC/2-1-256k)$");
conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100,
Expand Down Expand Up @@ -241,6 +242,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// verify key blocks were created in DN.
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyBlocksCreated(omKeyLocationInfoGroupList);
return true;
} catch (Throwable t) {
Expand Down Expand Up @@ -285,6 +287,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// The blocks should be deleted in the DN.
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyBlocksDeleted(omKeyLocationInfoGroupList);
return true;
} catch (Throwable t) {
Expand All @@ -301,6 +304,7 @@ public void testBlockDeletion(ReplicationConfig repConfig) throws Exception {
// Verify transactions committed
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
verifyTransactionsCommitted();
return true;
} catch (Throwable t) {
Expand Down Expand Up @@ -380,10 +384,16 @@ public void testContainerStatisticsAfterDelete() throws Exception {

writeClient.deleteKey(keyArgs);
// Wait for blocks to be deleted and container reports to be processed
GenericTestUtils.waitFor(() ->
scm.getContainerManager().getContainers().stream()
.allMatch(c -> c.getUsedBytes() == 0 &&
c.getNumberOfKeys() == 0), 500, 20000);
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
return scm.getContainerManager().getContainers().stream()
.allMatch(c -> c.getUsedBytes() == 0 &&
c.getNumberOfKeys() == 0);
}, 500, 20000);
Thread.sleep(5000);
// Verify that pending block delete num are as expected with resent cmds
cluster.getHddsDatanodes().forEach(dn -> {
Expand Down Expand Up @@ -428,6 +438,7 @@ public void testContainerStatisticsAfterDelete() throws Exception {
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand Down Expand Up @@ -519,14 +530,14 @@ public void testContainerStateAfterDNRestart() throws Exception {

GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
return scm.getContainerManager().getContainerReplicas(
containerId).stream().
allMatch(replica -> replica.isEmpty());
} catch (ContainerNotFoundException e) {
} catch (IOException e) {
throw new RuntimeException(e);
}
},
100, 10 * 1000);
}, 100, 10 * 1000);

// Container state should be empty now as key got deleted
assertTrue(getContainerFromDN(
Expand All @@ -549,6 +560,7 @@ public void testContainerStateAfterDNRestart() throws Exception {
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand All @@ -563,7 +575,6 @@ public void testContainerStateAfterDNRestart() throws Exception {
}
return true;
}, 500, 30000);
LOG.info(metrics.toString());
}

/**
Expand Down Expand Up @@ -649,14 +660,14 @@ public void testContainerDeleteWithInvalidKeyCount()
// Ensure isEmpty are true for all replica after delete key
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
return scm.getContainerManager().getContainerReplicas(
containerId).stream()
.allMatch(replica -> replica.isEmpty());
} catch (ContainerNotFoundException e) {
} catch (IOException e) {
throw new RuntimeException(e);
}
},
500, 5 * 2000);
}, 500, 5 * 2000);

// Update container replica by making invalid keyCount in one replica
ContainerReplica replicaOne = ContainerReplica.newBuilder()
Expand Down Expand Up @@ -686,6 +697,7 @@ public void testContainerDeleteWithInvalidKeyCount()
assertEquals(HddsProtos.LifeCycleState.DELETED,
container.getState());
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
assertEquals(HddsProtos.LifeCycleState.DELETED,
scm.getScmMetadataStore().getContainerTable()
.get(container.containerID()).getState());
Expand Down Expand Up @@ -817,6 +829,7 @@ public void testBlockDeleteCommandParallelProcess() throws Exception {
// Wait for block delete command sent from OM
GenericTestUtils.waitFor(() -> {
try {
scm.getScmHAManager().asSCMHADBTransactionBuffer().flush();
if (scm.getScmBlockManager().getDeletedBlockLog()
.getNumOfValidTransactions() > 0) {
return true;
Expand Down

0 comments on commit 962b562

Please sign in to comment.