usageInfoList =
- storageClient.getDatanodeUsageInfo(
- dn.getIpAddress(), dn.getUuidString());
+ @Test
+ public void testNodeOperationalStates() throws Exception {
+ StorageContainerManager scm = cluster().getStorageContainerManager();
+ NodeManager nm = scm.getScmNodeManager();
+ final int numOfDatanodes = nm.getAllNodes().size();
- assertEquals(1, usageInfoList.size());
- assertThat(usageInfoList.get(0).getContainerCount()).isGreaterThanOrEqualTo(0).isLessThanOrEqualTo(1);
- totalContainerCount[(int)usageInfoList.get(0).getContainerCount()]++;
+ // Set one node to be something other than IN_SERVICE
+ DatanodeDetails node = nm.getAllNodes().get(0);
+ nm.setNodeOperationalState(node, DECOMMISSIONING);
+
+ // All nodes should be returned as they are all in service
+ int nodeCount = storageClient.queryNode(IN_SERVICE, HEALTHY,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ assertEquals(numOfDatanodes - 1, nodeCount);
+
+ // null acts as wildcard for opState
+ nodeCount = storageClient.queryNode(null, HEALTHY,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ assertEquals(numOfDatanodes, nodeCount);
+
+ // null acts as wildcard for nodeState
+ nodeCount = storageClient.queryNode(IN_SERVICE, null,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ assertEquals(numOfDatanodes - 1, nodeCount);
+
+ // Both null - should return all nodes
+ nodeCount = storageClient.queryNode(null, null,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ assertEquals(numOfDatanodes, nodeCount);
+
+ // No node should be returned
+ nodeCount = storageClient.queryNode(IN_MAINTENANCE, HEALTHY,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ assertEquals(0, nodeCount);
+
+ // Test all operational states by looping over them all and setting the
+ // state manually.
+ node = nm.getAllNodes().get(0);
+ HddsProtos.NodeOperationalState originalState = nm.getNodeStatus(node).getOperationalState();
+ try {
+ for (HddsProtos.NodeOperationalState s :
+ HddsProtos.NodeOperationalState.values()) {
+ nm.setNodeOperationalState(node, s);
+ nodeCount = storageClient.queryNode(s, HEALTHY,
+ HddsProtos.QueryScope.CLUSTER, "").size();
+ if (s == IN_SERVICE) {
+ assertEquals(5, nodeCount);
+ } else {
+ assertEquals(1, nodeCount);
+ }
+ }
+ } finally {
+ nm.setNodeOperationalState(node, originalState);
}
- assertEquals(2, totalContainerCount[0]);
- assertEquals(1, totalContainerCount[1]);
}
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHA.java
index 550570a8986f..41399311d4bf 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHA.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHA.java
@@ -17,49 +17,21 @@
package org.apache.hadoop.hdds.scm;
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.hadoop.hdds.client.ReplicationFactor.ONE;
-import static org.apache.hadoop.hdds.client.ReplicationType.RATIS;
-import static org.apache.ozone.test.GenericTestUtils.getTestStartTime;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
-import java.time.Instant;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.hdds.client.RatisReplicationConfig;
+import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import org.apache.hadoop.hdds.scm.container.ContainerID;
-import org.apache.hadoop.hdds.scm.ha.SCMHAMetrics;
-import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
-import org.apache.hadoop.ozone.client.ObjectStore;
-import org.apache.hadoop.ozone.client.OzoneBucket;
-import org.apache.hadoop.ozone.client.OzoneClient;
-import org.apache.hadoop.ozone.client.OzoneKey;
-import org.apache.hadoop.ozone.client.OzoneVolume;
-import org.apache.hadoop.ozone.client.io.OzoneInputStream;
-import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
-import org.apache.hadoop.ozone.om.OzoneManager;
-import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
-import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
-import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
-import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ratis.statemachine.SnapshotInfo;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
@@ -67,48 +39,33 @@
import org.slf4j.LoggerFactory;
/**
- * Base class for SCM HA tests.
+ * Tests for SCM HA.
*/
@Timeout(300)
public class TestStorageContainerManagerHA {
private static final Logger LOG = LoggerFactory.getLogger(TestStorageContainerManagerHA.class);
- private MiniOzoneHAClusterImpl cluster = null;
+ private MiniOzoneHAClusterImpl cluster;
private OzoneConfiguration conf;
- private String omServiceId;
- private static int numOfOMs = 3;
- private String scmServiceId;
- private static int numOfSCMs = 3;
+ private static final int OM_COUNT = 3;
+ private static final int SCM_COUNT = 3;
- /**
- * Create a MiniDFSCluster for testing.
- *
- * Ozone is made active by setting OZONE_ENABLED = true
- *
- * @throws IOException
- */
public void init() throws Exception {
conf = new OzoneConfiguration();
conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s");
conf.set(ScmConfigKeys.OZONE_SCM_HA_DBTRANSACTIONBUFFER_FLUSH_INTERVAL,
"5s");
conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_GAP, "1");
- omServiceId = "om-service-test1";
- scmServiceId = "scm-service-test1";
cluster = MiniOzoneCluster.newHABuilder(conf)
- .setOMServiceId(omServiceId)
- .setSCMServiceId(scmServiceId)
- .setNumOfStorageContainerManagers(numOfSCMs)
- .setNumOfOzoneManagers(numOfOMs)
+ .setOMServiceId("om-service-test1")
+ .setSCMServiceId("scm-service-test1")
+ .setNumOfStorageContainerManagers(SCM_COUNT)
+ .setNumOfOzoneManagers(OM_COUNT)
.build();
cluster.waitForClusterToBeReady();
- waitForLeaderToBeReady();
}
- /**
- * Shutdown MiniDFSCluster.
- */
@AfterEach
public void shutdown() {
if (cluster != null) {
@@ -116,136 +73,6 @@ public void shutdown() {
}
}
- @Test
- void testAllSCMAreRunning() throws Exception {
- init();
- int count = 0;
- List scms = cluster.getStorageContainerManagers();
- assertEquals(numOfSCMs, scms.size());
- int peerSize = cluster.getStorageContainerManager().getScmHAManager()
- .getRatisServer().getDivision().getGroup().getPeers().size();
- StorageContainerManager leaderScm = null;
- for (StorageContainerManager scm : scms) {
- if (scm.checkLeader()) {
- count++;
- leaderScm = scm;
- }
- assertNotNull(scm.getScmHAManager().getRatisServer().getLeaderId());
- assertEquals(peerSize, numOfSCMs);
- }
- assertEquals(1, count);
- assertNotNull(leaderScm);
-
- assertRatisRoles();
-
- String leaderSCMId = leaderScm.getScmId();
- checkSCMHAMetricsForAllSCMs(scms, leaderSCMId);
-
- count = 0;
- List oms = cluster.getOzoneManagersList();
- assertEquals(numOfOMs, oms.size());
- for (OzoneManager om : oms) {
- if (om.isLeaderReady()) {
- count++;
- }
- }
- assertEquals(1, count);
-
- // verify timer based transaction buffer flush is working
- SnapshotInfo latestSnapshot = leaderScm.getScmHAManager()
- .asSCMHADBTransactionBuffer().getLatestSnapshot();
- doPutKey();
- final StorageContainerManager leaderScmTmp = leaderScm;
- GenericTestUtils.waitFor(() -> {
- if (leaderScmTmp.getScmHAManager().asSCMHADBTransactionBuffer()
- .getLatestSnapshot() != null) {
- if (leaderScmTmp.getScmHAManager().asSCMHADBTransactionBuffer()
- .getLatestSnapshot().getIndex() > latestSnapshot.getIndex()) {
- return true;
- }
- }
- return false;
- }, 2000, 30000);
- }
-
- private void doPutKey() throws Exception {
- String volumeName = UUID.randomUUID().toString();
- String bucketName = UUID.randomUUID().toString();
- Instant testStartTime = getTestStartTime();
- try (OzoneClient client = cluster.newClient()) {
- ObjectStore store = client.getObjectStore();
- String value = "sample value";
- store.createVolume(volumeName);
- OzoneVolume volume = store.getVolume(volumeName);
- volume.createBucket(bucketName);
- OzoneBucket bucket = volume.getBucket(bucketName);
-
- String keyName = UUID.randomUUID().toString();
-
- OzoneOutputStream out = bucket
- .createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE,
- new HashMap<>());
- out.write(value.getBytes(UTF_8));
- out.close();
- OzoneKey key = bucket.getKey(keyName);
- assertEquals(keyName, key.getName());
- OzoneInputStream is = bucket.readKey(keyName);
- byte[] fileContent = new byte[value.getBytes(UTF_8).length];
- assertEquals(fileContent.length, is.read(fileContent));
- assertEquals(value, new String(fileContent, UTF_8));
- assertFalse(key.getCreationTime().isBefore(testStartTime));
- assertFalse(key.getModificationTime().isBefore(testStartTime));
- is.close();
- final OmKeyArgs keyArgs = new OmKeyArgs.Builder()
- .setVolumeName(volumeName)
- .setBucketName(bucketName)
- .setReplicationConfig(RatisReplicationConfig.getInstance(
- HddsProtos.ReplicationFactor.ONE))
- .setKeyName(keyName)
- .build();
- final OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs);
- final List keyLocationInfos =
- keyInfo.getKeyLocationVersions().get(0).getBlocksLatestVersionOnly();
- long index = -1;
- for (StorageContainerManager scm : cluster
- .getStorageContainerManagers()) {
- if (scm.checkLeader()) {
- index = getLastAppliedIndex(scm);
- }
- }
- assertNotEquals(-1, index);
- long finalIndex = index;
- // Ensure all follower scms have caught up with the leader
- GenericTestUtils.waitFor(() -> areAllScmInSync(finalIndex), 100, 10000);
- final long containerID = keyLocationInfos.get(0).getContainerID();
- for (int k = 0; k < numOfSCMs; k++) {
- StorageContainerManager scm =
- cluster.getStorageContainerManagers().get(k);
- // flush to DB on each SCM
- ((SCMRatisServerImpl) scm.getScmHAManager().getRatisServer())
- .getStateMachine().takeSnapshot();
- assertTrue(scm.getContainerManager()
- .containerExist(ContainerID.valueOf(containerID)));
- assertNotNull(scm.getScmMetadataStore().getContainerTable()
- .get(ContainerID.valueOf(containerID)));
- }
- }
- }
-
- private long getLastAppliedIndex(StorageContainerManager scm) {
- return scm.getScmHAManager().getRatisServer().getDivision().getInfo()
- .getLastAppliedIndex();
- }
-
- private boolean areAllScmInSync(long leaderIndex) {
- List scms = cluster.getStorageContainerManagers();
- boolean sync = false;
- for (StorageContainerManager scm : scms) {
- sync = getLastAppliedIndex(scm) == leaderIndex;
- }
- return sync;
- }
-
@Test
public void testPrimordialSCM() throws Exception {
init();
@@ -274,7 +101,7 @@ public void testBootStrapSCM() throws Exception {
assertTrue(isDeleted);
final SCMStorageConfig scmStorageConfig = new SCMStorageConfig(conf2);
scmStorageConfig.setClusterId(UUID.randomUUID().toString());
- scmStorageConfig.getCurrentDir().delete();
+ FileUtils.deleteDirectory(scmStorageConfig.getCurrentDir());
scmStorageConfig.setSCMHAFlag(true);
scmStorageConfig.initialize();
conf2.setBoolean(ScmConfigKeys.OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_KEY,
@@ -285,49 +112,6 @@ public void testBootStrapSCM() throws Exception {
assertTrue(StorageContainerManager.scmBootstrap(conf2));
}
- private void assertRatisRoles() {
- Set resultSet = new HashSet<>();
- for (StorageContainerManager scm: cluster.getStorageContainerManagers()) {
- resultSet.addAll(scm.getScmHAManager().getRatisServer().getRatisRoles());
- }
- System.out.println(resultSet);
- assertEquals(3, resultSet.size());
- assertEquals(1,
- resultSet.stream().filter(x -> x.contains("LEADER")).count());
- }
-
- private void checkSCMHAMetricsForAllSCMs(List scms,
- String leaderSCMId) {
- for (StorageContainerManager scm : scms) {
- String nodeId = scm.getScmId();
-
- SCMHAMetrics scmHAMetrics = scm.getScmHAMetrics();
- // If current SCM is leader, state should be 1
- int expectedState = nodeId.equals(leaderSCMId) ? 1 : 0;
-
- assertEquals(expectedState,
- scmHAMetrics.getSCMHAMetricsInfoLeaderState());
- assertEquals(nodeId, scmHAMetrics.getSCMHAMetricsInfoNodeId());
- }
- }
-
- /**
- * Some tests are stopping or restarting SCMs.
- * There are test cases where we might need to
- * wait for a leader to be elected and ready.
- */
- private void waitForLeaderToBeReady()
- throws InterruptedException, TimeoutException {
- GenericTestUtils.waitFor(() -> {
- try {
- return cluster.getActiveSCM().checkLeader();
- } catch (Exception e) {
- return false;
- }
- }, 1000, (int) ScmConfigKeys
- .OZONE_SCM_HA_RATIS_LEADER_READY_WAIT_TIMEOUT_DEFAULT);
- }
-
@Test
public void testSCMLeadershipMetric() throws IOException, InterruptedException {
// GIVEN
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHAWithAllRunning.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHAWithAllRunning.java
new file mode 100644
index 000000000000..1447e377b464
--- /dev/null
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManagerHAWithAllRunning.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.scm;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.ozone.test.GenericTestUtils.getTestStartTime;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.OutputStream;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.ha.SCMHAMetrics;
+import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneBucket;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneKey;
+import org.apache.hadoop.ozone.client.OzoneVolume;
+import org.apache.hadoop.ozone.client.io.OzoneInputStream;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
+import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ozone.test.HATests;
+import org.apache.ratis.statemachine.SnapshotInfo;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+/**
+ * Tests for SCM HA.
+ */
+@Timeout(300)
+public abstract class TestStorageContainerManagerHAWithAllRunning implements HATests.TestCase {
+
+ private static final int OM_COUNT = 3;
+ private static final int SCM_COUNT = 3;
+ // conf.set(ScmConfigKeys.OZONE_SCM_HA_DBTRANSACTIONBUFFER_FLUSH_INTERVAL, "5s");
+ // conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_GAP, "1");
+
+ @Test
+ void testAllSCMAreRunning() throws Exception {
+ List scms = cluster().getStorageContainerManagers();
+ assertEquals(SCM_COUNT, scms.size());
+ int peerSize = cluster().getStorageContainerManager().getScmHAManager()
+ .getRatisServer().getDivision().getGroup().getPeers().size();
+ int scmLeadersFound = 0;
+ StorageContainerManager leaderScm = null;
+ for (StorageContainerManager scm : scms) {
+ if (scm.checkLeader()) {
+ scmLeadersFound++;
+ leaderScm = scm;
+ }
+ assertNotNull(scm.getScmHAManager().getRatisServer().getLeaderId());
+ assertEquals(peerSize, SCM_COUNT);
+ }
+ assertEquals(1, scmLeadersFound);
+ assertNotNull(leaderScm);
+
+ assertRatisRoles();
+
+ String leaderSCMId = leaderScm.getScmId();
+ checkSCMHAMetricsForAllSCMs(scms, leaderSCMId);
+
+ List oms = cluster().getOzoneManagersList();
+ assertEquals(OM_COUNT, oms.size());
+ int omLeadersFound = 0;
+ for (OzoneManager om : oms) {
+ if (om.isLeaderReady()) {
+ omLeadersFound++;
+ }
+ }
+ assertEquals(1, omLeadersFound);
+
+ // verify timer based transaction buffer flush is working
+ SnapshotInfo latestSnapshot = leaderScm.getScmHAManager()
+ .asSCMHADBTransactionBuffer()
+ .getLatestSnapshot();
+ doPutKey();
+ final StorageContainerManager leaderScmTmp = leaderScm;
+ GenericTestUtils.waitFor(() -> {
+ SnapshotInfo newSnapshot = leaderScmTmp.getScmHAManager()
+ .asSCMHADBTransactionBuffer()
+ .getLatestSnapshot();
+ return newSnapshot != null && newSnapshot.getIndex() > latestSnapshot.getIndex();
+ }, 2000, 30000);
+ }
+
+ private void doPutKey() throws Exception {
+ String volumeName = UUID.randomUUID().toString();
+ String bucketName = UUID.randomUUID().toString();
+ Instant testStartTime = getTestStartTime();
+ try (OzoneClient client = cluster().newClient()) {
+ ObjectStore store = client.getObjectStore();
+ String value = "sample value";
+ store.createVolume(volumeName);
+ OzoneVolume volume = store.getVolume(volumeName);
+ volume.createBucket(bucketName);
+ OzoneBucket bucket = volume.getBucket(bucketName);
+
+ String keyName = UUID.randomUUID().toString();
+
+ byte[] bytes = value.getBytes(UTF_8);
+ RatisReplicationConfig replication = RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE);
+ try (OutputStream out = bucket.createKey(keyName, bytes.length, replication, new HashMap<>())) {
+ out.write(bytes);
+ }
+
+ OzoneKey key = bucket.getKey(keyName);
+ assertEquals(keyName, key.getName());
+
+ try (OzoneInputStream is = bucket.readKey(keyName)) {
+ byte[] fileContent = new byte[bytes.length];
+ assertEquals(fileContent.length, is.read(fileContent));
+ assertEquals(value, new String(fileContent, UTF_8));
+ assertFalse(key.getCreationTime().isBefore(testStartTime));
+ assertFalse(key.getModificationTime().isBefore(testStartTime));
+ }
+
+ final OmKeyArgs keyArgs = new OmKeyArgs.Builder()
+ .setVolumeName(volumeName)
+ .setBucketName(bucketName)
+ .setReplicationConfig(replication)
+ .setKeyName(keyName)
+ .build();
+ final OmKeyInfo keyInfo = cluster().getOzoneManager().lookupKey(keyArgs);
+ final List keyLocationInfos =
+ keyInfo.getKeyLocationVersions().get(0).getBlocksLatestVersionOnly();
+ long index = -1;
+ for (StorageContainerManager scm : cluster().getStorageContainerManagers()) {
+ if (scm.checkLeader()) {
+ index = getLastAppliedIndex(scm);
+ }
+ }
+ assertNotEquals(-1, index);
+ long finalIndex = index;
+ // Ensure all follower scms have caught up with the leader
+ GenericTestUtils.waitFor(() -> areAllScmInSync(finalIndex), 100, 10000);
+ final long containerID = keyLocationInfos.get(0).getContainerID();
+ for (int k = 0; k < SCM_COUNT; k++) {
+ StorageContainerManager scm =
+ cluster().getStorageContainerManagers().get(k);
+ // flush to DB on each SCM
+ ((SCMRatisServerImpl) scm.getScmHAManager().getRatisServer())
+ .getStateMachine().takeSnapshot();
+ assertTrue(scm.getContainerManager()
+ .containerExist(ContainerID.valueOf(containerID)));
+ assertNotNull(scm.getScmMetadataStore().getContainerTable()
+ .get(ContainerID.valueOf(containerID)));
+ }
+ }
+ }
+
+ private long getLastAppliedIndex(StorageContainerManager scm) {
+ return scm.getScmHAManager().getRatisServer().getDivision().getInfo()
+ .getLastAppliedIndex();
+ }
+
+ private boolean areAllScmInSync(long leaderIndex) {
+ List scms = cluster().getStorageContainerManagers();
+ boolean sync = false;
+ for (StorageContainerManager scm : scms) {
+ sync = getLastAppliedIndex(scm) == leaderIndex;
+ }
+ return sync;
+ }
+
+ private void assertRatisRoles() {
+ Set resultSet = new HashSet<>();
+ for (StorageContainerManager scm: cluster().getStorageContainerManagers()) {
+ resultSet.addAll(scm.getScmHAManager().getRatisServer().getRatisRoles());
+ }
+ System.out.println(resultSet);
+ assertEquals(3, resultSet.size());
+ assertEquals(1,
+ resultSet.stream().filter(x -> x.contains("LEADER")).count());
+ }
+
+ private void checkSCMHAMetricsForAllSCMs(List scms,
+ String leaderSCMId) {
+ for (StorageContainerManager scm : scms) {
+ String nodeId = scm.getScmId();
+
+ SCMHAMetrics scmHAMetrics = scm.getScmHAMetrics();
+ // If current SCM is leader, state should be 1
+ int expectedState = nodeId.equals(leaderSCMId) ? 1 : 0;
+
+ assertEquals(expectedState,
+ scmHAMetrics.getSCMHAMetricsInfoLeaderState());
+ assertEquals(nodeId, scmHAMetrics.getSCMHAMetricsInfoNodeId());
+ }
+ }
+
+}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestXceiverClientManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestXceiverClientManager.java
index f5b50dd11749..bd28fba0b28b 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestXceiverClientManager.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestXceiverClientManager.java
@@ -38,12 +38,13 @@
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
+import org.apache.ozone.test.NonHATests;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
@@ -52,30 +53,21 @@
/**
* Test for XceiverClientManager caching and eviction.
*/
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@Timeout(300)
-public class TestXceiverClientManager {
+public abstract class TestXceiverClientManager implements NonHATests.TestCase {
- private static OzoneConfiguration config;
- private static MiniOzoneCluster cluster;
- private static StorageContainerLocationProtocolClientSideTranslatorPB
+ private StorageContainerLocationProtocolClientSideTranslatorPB
storageContainerLocationClient;
@BeforeAll
- public static void init() throws Exception {
- config = new OzoneConfiguration();
- cluster = MiniOzoneCluster.newBuilder(config)
- .setNumDatanodes(3)
- .build();
- cluster.waitForClusterToBeReady();
- storageContainerLocationClient = cluster
+ void init() throws Exception {
+ storageContainerLocationClient = cluster()
.getStorageContainerLocationClient();
}
@AfterAll
- public static void shutdown() {
- if (cluster != null) {
- cluster.shutdown();
- }
+ void shutdown() {
IOUtils.cleanupWithLogger(null, storageContainerLocationClient);
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java
index 8e6aa7668035..531a43eb4bba 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/metrics/TestSCMContainerManagerMetrics.java
@@ -18,20 +18,20 @@
package org.apache.hadoop.hdds.scm.container.metrics;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
-import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION;
import static org.apache.ozone.test.MetricsAsserts.getLongCounter;
import static org.apache.ozone.test.MetricsAsserts.getMetrics;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.io.IOException;
import java.util.HashMap;
+import java.util.UUID;
import org.apache.commons.lang3.RandomUtils;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicationFactor;
import org.apache.hadoop.hdds.client.ReplicationType;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
@@ -40,43 +40,36 @@
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
-import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ozone.test.NonHATests;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.Timeout;
/**
* Class used to test {@link SCMContainerManagerMetrics}.
*/
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@Timeout(300)
-public class TestSCMContainerManagerMetrics {
+public abstract class TestSCMContainerManagerMetrics implements NonHATests.TestCase {
- private MiniOzoneCluster cluster;
private StorageContainerManager scm;
private OzoneClient client;
@BeforeEach
public void setup() throws Exception {
- OzoneConfiguration conf = new OzoneConfiguration();
- conf.set(HDDS_CONTAINER_REPORT_INTERVAL, "3000s");
- conf.setBoolean(HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false);
- cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
- cluster.waitForClusterToBeReady();
- cluster.waitForPipelineTobeReady(HddsProtos.ReplicationFactor.ONE, 30000);
- client = cluster.newClient();
- scm = cluster.getStorageContainerManager();
+ client = cluster().newClient();
+ scm = cluster().getStorageContainerManager();
}
-
@AfterEach
public void teardown() {
IOUtils.closeQuietly(client);
- cluster.shutdown();
}
@Test
@@ -86,6 +79,8 @@ public void testContainerOpsMetrics() throws Exception {
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
long numSuccessfulCreateContainers = getLongCounter(
"NumSuccessfulCreateContainers", metrics);
+ long numFailureCreateContainers = getLongCounter(
+ "NumFailureCreateContainers", metrics);
ContainerInfo containerInfo = containerManager.allocateContainer(
RatisReplicationConfig.getInstance(
@@ -97,25 +92,26 @@ public void testContainerOpsMetrics() throws Exception {
assertThrows(IOException.class, () ->
containerManager.allocateContainer(
- RatisReplicationConfig.getInstance(
- HddsProtos.ReplicationFactor.THREE), OzoneConsts.OZONE));
+ new ECReplicationConfig(8, 5), OzoneConsts.OZONE));
// allocateContainer should fail, so it should have the old metric value.
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
assertEquals(getLongCounter("NumSuccessfulCreateContainers",
metrics), numSuccessfulCreateContainers);
assertEquals(getLongCounter("NumFailureCreateContainers",
- metrics), 1);
+ metrics), numFailureCreateContainers + 1);
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
long numSuccessfulDeleteContainers = getLongCounter(
"NumSuccessfulDeleteContainers", metrics);
+ long numFailureDeleteContainers = getLongCounter(
+ "NumFailureDeleteContainers", metrics);
containerManager.deleteContainer(
ContainerID.valueOf(containerInfo.getContainerID()));
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
- metrics), numSuccessfulDeleteContainers + 1);
+ metrics), ++numSuccessfulDeleteContainers);
assertThrows(ContainerNotFoundException.class, () ->
containerManager.deleteContainer(
@@ -123,9 +119,9 @@ public void testContainerOpsMetrics() throws Exception {
// deleteContainer should fail, so it should have the old metric value.
metrics = getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
assertEquals(getLongCounter("NumSuccessfulDeleteContainers",
- metrics), numSuccessfulCreateContainers);
+ metrics), numSuccessfulDeleteContainers);
assertEquals(getLongCounter("NumFailureDeleteContainers",
- metrics), 1);
+ metrics), numFailureDeleteContainers + 1);
long currentValue = getLongCounter("NumListContainerOps", metrics);
containerManager.getContainers(
@@ -138,14 +134,16 @@ public void testContainerOpsMetrics() throws Exception {
@Test
public void testReportProcessingMetrics() throws Exception {
- String volumeName = "vol1";
+ String volumeName = "vol-" + UUID.randomUUID();
String bucketName = "bucket1";
String key = "key1";
MetricsRecordBuilder metrics =
getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
- assertEquals(1L,
- getLongCounter("NumContainerReportsProcessedSuccessful", metrics));
+ assertThat(getLongCounter("NumContainerReportsProcessedSuccessful", metrics))
+ .isPositive();
+
+ final long previous = getLongCounter("NumICRReportsProcessedSuccessful", metrics);
// Create key should create container on DN.
client.getObjectStore().getClientProxy()
@@ -166,7 +164,7 @@ public void testReportProcessingMetrics() throws Exception {
final MetricsRecordBuilder scmMetrics =
getMetrics(SCMContainerManagerMetrics.class.getSimpleName());
return getLongCounter("NumICRReportsProcessedSuccessful",
- scmMetrics) == 1;
- }, 1000, 500000);
+ scmMetrics) >= previous + 1;
+ }, 100, 30_000);
}
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java
index ca802a63ebf0..acf4599a28fc 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestDecommissionAndMaintenance.java
@@ -79,7 +79,6 @@
import org.apache.hadoop.ozone.client.OzoneBucket;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.tag.Flaky;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
@@ -91,7 +90,6 @@
/**
* Test from the scmclient for decommission and maintenance.
*/
-@Flaky({"HDDS-6028", "HDDS-6049"})
public class TestDecommissionAndMaintenance {
private static final Logger LOG =
LoggerFactory.getLogger(TestDecommissionAndMaintenance.class);
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestQueryNode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestQueryNode.java
index b204394b1e39..f707580a2464 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestQueryNode.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/node/TestQueryNode.java
@@ -22,26 +22,19 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
-import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient;
-import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.AfterEach;
@@ -83,13 +76,6 @@ public void tearDown() throws Exception {
}
}
- @Test
- public void testHealthyNodesCount() throws Exception {
- List nodes = scmClient.queryNode(null, HEALTHY,
- HddsProtos.QueryScope.CLUSTER, "");
- assertEquals(numOfDatanodes, nodes.size(), "Expected live nodes");
- }
-
@Test
public void testStaleNodesCount() throws Exception {
ExecutorService executor = Executors.newFixedThreadPool(1);
@@ -123,54 +109,4 @@ public void testStaleNodesCount() throws Exception {
HddsProtos.QueryScope.CLUSTER, "").size();
assertEquals(2, nodeCount, "Mismatch of expected nodes count");
}
-
- @Test
- public void testNodeOperationalStates() throws Exception {
- StorageContainerManager scm = cluster.getStorageContainerManager();
- NodeManager nm = scm.getScmNodeManager();
-
- // Set one node to be something other than IN_SERVICE
- DatanodeDetails node = nm.getAllNodes().get(0);
- nm.setNodeOperationalState(node, DECOMMISSIONING);
-
- // All nodes should be returned as they are all in service
- int nodeCount = scmClient.queryNode(IN_SERVICE, HEALTHY,
- HddsProtos.QueryScope.CLUSTER, "").size();
- assertEquals(numOfDatanodes - 1, nodeCount);
-
- // null acts as wildcard for opState
- nodeCount = scmClient.queryNode(null, HEALTHY,
- HddsProtos.QueryScope.CLUSTER, "").size();
- assertEquals(numOfDatanodes, nodeCount);
-
- // null acts as wildcard for nodeState
- nodeCount = scmClient.queryNode(IN_SERVICE, null,
- HddsProtos.QueryScope.CLUSTER, "").size();
- assertEquals(numOfDatanodes - 1, nodeCount);
-
- // Both null - should return all nodes
- nodeCount = scmClient.queryNode(null, null,
- HddsProtos.QueryScope.CLUSTER, "").size();
- assertEquals(numOfDatanodes, nodeCount);
-
- // No node should be returned
- nodeCount = scmClient.queryNode(IN_MAINTENANCE, HEALTHY,
- HddsProtos.QueryScope.CLUSTER, "").size();
- assertEquals(0, nodeCount);
-
- // Test all operational states by looping over them all and setting the
- // state manually.
- node = nm.getAllNodes().get(0);
- for (HddsProtos.NodeOperationalState s :
- HddsProtos.NodeOperationalState.values()) {
- nm.setNodeOperationalState(node, s);
- nodeCount = scmClient.queryNode(s, HEALTHY,
- HddsProtos.QueryScope.CLUSTER, "").size();
- if (s == IN_SERVICE) {
- assertEquals(5, nodeCount);
- } else {
- assertEquals(1, nodeCount);
- }
- }
- }
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/HATests.java b/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/HATests.java
index 6210683a511e..cf994637aba4 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/HATests.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/HATests.java
@@ -18,8 +18,14 @@
package org.apache.ozone.test;
import java.util.UUID;
+import org.apache.hadoop.fs.ozone.TestOzoneFsHAURLs;
+import org.apache.hadoop.hdds.scm.TestStorageContainerManagerHAWithAllRunning;
+import org.apache.hadoop.hdds.scm.container.TestScmApplyTransactionFailure;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
+import org.apache.hadoop.ozone.TestGetClusterTreeInformation;
+import org.apache.hadoop.ozone.container.metrics.TestDatanodeQueueMetrics;
+import org.apache.hadoop.ozone.shell.TestScmAdminHA;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.TestInstance;
@@ -48,7 +54,7 @@ public interface TestCase {
}
@Nested
- class OzoneFsHAURLs extends org.apache.hadoop.fs.ozone.TestOzoneFsHAURLs {
+ class OzoneFsHAURLs extends TestOzoneFsHAURLs {
@Override
public MiniOzoneHAClusterImpl cluster() {
return getCluster();
@@ -56,7 +62,7 @@ public MiniOzoneHAClusterImpl cluster() {
}
@Nested
- class ScmApplyTransactionFailure extends org.apache.hadoop.hdds.scm.container.TestScmApplyTransactionFailure {
+ class StorageContainerManagerHAWithAllRunning extends TestStorageContainerManagerHAWithAllRunning {
@Override
public MiniOzoneHAClusterImpl cluster() {
return getCluster();
@@ -64,7 +70,7 @@ public MiniOzoneHAClusterImpl cluster() {
}
@Nested
- class GetClusterTreeInformation extends org.apache.hadoop.ozone.TestGetClusterTreeInformation {
+ class ScmApplyTransactionFailure extends TestScmApplyTransactionFailure {
@Override
public MiniOzoneHAClusterImpl cluster() {
return getCluster();
@@ -72,7 +78,7 @@ public MiniOzoneHAClusterImpl cluster() {
}
@Nested
- class DatanodeQueueMetrics extends org.apache.hadoop.ozone.container.metrics.TestDatanodeQueueMetrics {
+ class GetClusterTreeInformation extends TestGetClusterTreeInformation {
@Override
public MiniOzoneHAClusterImpl cluster() {
return getCluster();
@@ -80,7 +86,15 @@ public MiniOzoneHAClusterImpl cluster() {
}
@Nested
- class ScmAdminHA extends org.apache.hadoop.ozone.shell.TestScmAdminHA {
+ class DatanodeQueueMetrics extends TestDatanodeQueueMetrics {
+ @Override
+ public MiniOzoneHAClusterImpl cluster() {
+ return getCluster();
+ }
+ }
+
+ @Nested
+ class ScmAdminHA extends TestScmAdminHA {
@Override
public MiniOzoneHAClusterImpl cluster() {
return getCluster();
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/NonHATests.java b/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/NonHATests.java
index 5242a35b27aa..d3340071dfa1 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/NonHATests.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/ozone/test/NonHATests.java
@@ -17,6 +17,7 @@
package org.apache.ozone.test;
+import org.apache.hadoop.hdds.scm.TestContainerOperations;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.TestInstance;
@@ -50,6 +51,14 @@ public MiniOzoneCluster cluster() {
}
}
+ @Nested
+ class ContainerOperations extends TestContainerOperations {
+ @Override
+ public MiniOzoneCluster cluster() {
+ return getCluster();
+ }
+ }
+
@Nested
class ContainerReportWithKeys extends org.apache.hadoop.hdds.scm.TestContainerReportWithKeys {
@Override
@@ -90,6 +99,22 @@ public MiniOzoneCluster cluster() {
}
}
+ @Nested
+ class XceiverClientManager extends org.apache.hadoop.hdds.scm.TestXceiverClientManager {
+ @Override
+ public MiniOzoneCluster cluster() {
+ return getCluster();
+ }
+ }
+
+ @Nested
+ class SCMContainerManagerMetrics extends org.apache.hadoop.hdds.scm.container.metrics.TestSCMContainerManagerMetrics {
+ @Override
+ public MiniOzoneCluster cluster() {
+ return getCluster();
+ }
+ }
+
@Nested
class Node2PipelineMap extends org.apache.hadoop.hdds.scm.pipeline.TestNode2PipelineMap {
@Override