diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java new file mode 100644 index 000000000000..3a8c5bcacfa1 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.ha; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.Interns; +import org.apache.hadoop.ozone.OzoneConsts; + +/** + * SCM HA metrics. + */ +@Metrics(about = "SCM HA metrics", context = OzoneConsts.OZONE) +public final class SCMHAMetrics implements MetricsSource { + + /** + * Metrics value holder. + */ + private static final class SCMHAMetricsInfo { + + private static final MetricsInfo SCM_MANAGER_HA_LEADER_STATE = + Interns.info("SCMHALeaderState", "Leader active " + + "state of SCM node (1 leader, 0 follower"); + private static final MetricsInfo NODE_ID = Interns.info("NodeId", + "SCM node Id"); + private int scmHALeaderState; + private String nodeId; + + public int getScmHALeaderState() { + return scmHALeaderState; + } + + public void setScmHALeaderState(int scmHALeaderState) { + this.scmHALeaderState = scmHALeaderState; + } + + public String getNodeId() { + return nodeId; + } + + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + } + + private static final String SOURCE_NAME = SCMHAMetrics.class.getSimpleName(); + private final SCMHAMetricsInfo scmHAMetricsInfo = new SCMHAMetricsInfo(); + private final String currNodeId; + private final String leaderId; + + private SCMHAMetrics(String currNodeId, String leaderId) { + this.currNodeId = currNodeId; + this.leaderId = leaderId; + } + + /** + * Creates and returns SCMHAMetrics instance. + * @return SCMHAMetrics + */ + public static SCMHAMetrics create(String nodeId, String leaderId) { + SCMHAMetrics metrics = new SCMHAMetrics(nodeId, leaderId); + return DefaultMetricsSystem.instance() + .register(SOURCE_NAME, "SCM HA metrics", metrics); + } + + /** + * Unregisters the metrics instance. + */ + public static void unRegister() { + DefaultMetricsSystem.instance().unregisterSource(SOURCE_NAME); + } + + @Override + public synchronized void getMetrics(MetricsCollector collector, boolean all) { + // Check current node state (1 leader, 0 follower) + int state = currNodeId.equals(leaderId) ? 1 : 0; + scmHAMetricsInfo.setNodeId(currNodeId); + scmHAMetricsInfo.setScmHALeaderState(state); + + MetricsRecordBuilder recordBuilder = collector.addRecord(SOURCE_NAME); + recordBuilder + .tag(SCMHAMetricsInfo.NODE_ID, currNodeId) + .addGauge(SCMHAMetricsInfo.SCM_MANAGER_HA_LEADER_STATE, state); + recordBuilder.endRecord(); + } + + @VisibleForTesting + public String getSCMHAMetricsInfoNodeId() { + return scmHAMetricsInfo.getNodeId(); + } + + @VisibleForTesting + public int getSCMHAMetricsInfoLeaderState() { + return scmHAMetricsInfo.getScmHALeaderState(); + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index 5e5298a96767..c88331db9808 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -283,6 +283,8 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, deletedBlockLog instanceof DeletedBlockLogImpl); ((DeletedBlockLogImpl) deletedBlockLog).onBecomeLeader(); scm.getScmDecommissionManager().onBecomeLeader(); + + scm.scmHAMetricsUpdate(newLeaderId.toString()); } @Override diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 49a2f88a41b6..9ce0d256647d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.ha.SCMHAManager; import org.apache.hadoop.hdds.scm.ha.SCMHAManagerImpl; +import org.apache.hadoop.hdds.scm.ha.SCMHAMetrics; import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo; import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; @@ -215,6 +216,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl * SCM metrics. */ private static SCMMetrics metrics; + private SCMHAMetrics scmHAMetrics; /* * RPC Endpoints exposed by SCM. @@ -1490,6 +1492,9 @@ public void start() throws IOException { } setStartTime(); + + // At this point leader is not known + scmHAMetricsUpdate(null); } /** Persist SCM certs to DB on bootstrap scm nodes. @@ -1636,6 +1641,10 @@ public void stop() { LOG.error("SCM HA Manager stop failed", ex); } + if (scmHAMetrics != null) { + SCMHAMetrics.unRegister(); + } + IOUtils.cleanupWithLogger(LOG, containerManager); IOUtils.cleanupWithLogger(LOG, pipelineManager); @@ -1956,6 +1965,11 @@ public String getSCMNodeId() { return scmHANodeDetails.getLocalNodeDetails().getNodeId(); } + @VisibleForTesting + public SCMHAMetrics getScmHAMetrics() { + return scmHAMetrics; + } + private void startSecretManagerIfNecessary() { boolean shouldRun = securityConfig.isSecurityEnabled() && securityConfig.isContainerTokenEnabled() @@ -2115,4 +2129,12 @@ public boolean removePeerFromHARing(RemoveSCMRequest request) return scmHAManager.removeSCM(request); } + + public void scmHAMetricsUpdate(String leaderId) { + // unregister, in case metrics already exist + // so that the metric tags will get updated. + SCMHAMetrics.unRegister(); + + scmHAMetrics = SCMHAMetrics.create(getScmId(), leaderId); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAMetrics.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAMetrics.java new file mode 100644 index 000000000000..3a504fc84d29 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAMetrics.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.ha; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link SCMHAMetrics}. + */ +class TestSCMHAMetrics { + + private static final MetricsCollectorImpl METRICS_COLLECTOR = + new MetricsCollectorImpl(); + private static final String NODE_ID = + "scm" + RandomStringUtils.randomNumeric(5); + private String leaderId; + private SCMHAMetrics scmhaMetrics; + + @AfterEach + public void cleanup() { + SCMHAMetrics.unRegister(); + } + + @Test + public void testGetMetricsWithLeader() { + // GIVEN + leaderId = NODE_ID; + + // WHEN + scmhaMetrics = SCMHAMetrics.create(NODE_ID, leaderId); + scmhaMetrics.getMetrics(METRICS_COLLECTOR, true); + + // THEN + Assertions.assertEquals(1, scmhaMetrics.getSCMHAMetricsInfoLeaderState()); + } + + @Test + public void testGetMetricsWithFollower() { + // GIVEN + leaderId = "scm" + RandomStringUtils.randomNumeric(5); + + // WHEN + scmhaMetrics = SCMHAMetrics.create(NODE_ID, leaderId); + scmhaMetrics.getMetrics(METRICS_COLLECTOR, true); + + // THEN + Assertions.assertEquals(0, scmhaMetrics.getSCMHAMetricsInfoLeaderState()); + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/package-info.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/package-info.java new file mode 100644 index 000000000000..12c241dd0f21 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/package-info.java @@ -0,0 +1,18 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.ha; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java index f4cd64bd5950..4424324ea4f6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.ha.SCMHAMetrics; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; @@ -42,17 +43,13 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.ozone.test.GenericTestUtils; -import org.junit.Assert; -import org.junit.Rule; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import org.junit.rules.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.time.Instant; @@ -61,14 +58,16 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.concurrent.TimeoutException; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.client.ReplicationFactor.ONE; import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; /** - * Base class for Ozone Manager HA tests. + * Base class for SCM HA tests. */ +@Timeout(300) public class TestStorageContainerManagerHA { private MiniOzoneHAClusterImpl cluster = null; @@ -80,12 +79,6 @@ public class TestStorageContainerManagerHA { private String scmServiceId; private static int numOfSCMs = 3; - private static final Logger LOG = LoggerFactory - .getLogger(TestStorageContainerManagerHA.class); - - @Rule - public Timeout timeout = new Timeout(300_000); - /** * Create a MiniDFSCluster for testing. *
@@ -127,28 +120,29 @@ public void shutdown() {
public void testAllSCMAreRunning() throws Exception {
int count = 0;
List
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.scm;