Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.hdds.server.events.TypedEvent;
Expand All @@ -40,15 +42,18 @@ public class DataNodeSafeModeRule extends
private int registeredDns = 0;
// Set to track registered DataNodes.
private HashSet<UUID> registeredDnSet;
private NodeManager nodeManager;

public DataNodeSafeModeRule(EventQueue eventQueue,
ConfigurationSource conf,
NodeManager nodeManager,
SCMSafeModeManager manager) {
super(manager, NAME, eventQueue);
requiredDns = conf.getInt(
HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT);
registeredDnSet = new HashSet<>(requiredDns * 2);
this.nodeManager = nodeManager;
}

@Override
Expand All @@ -58,7 +63,10 @@ protected TypedEvent<NodeRegistrationContainerReport> getEventType() {

@Override
protected boolean validate() {
return registeredDns >= requiredDns;
if (validateBasedOnReportProcessing()) {
return registeredDns >= requiredDns;
}
return nodeManager.getNodes(NodeStatus.inServiceHealthy()).size() >= requiredDns;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMService.Event;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
Expand Down Expand Up @@ -90,6 +91,7 @@ public class SCMSafeModeManager implements SafeModeManager {
private Map<String, SafeModeExitRule> exitRules = new HashMap<>(1);
private Set<String> preCheckRules = new HashSet<>(1);
private ConfigurationSource config;
private static final String DN_EXIT_RULE = "DataNodeSafeModeRule";
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineSafeModeRule";
Expand All @@ -105,12 +107,10 @@ public class SCMSafeModeManager implements SafeModeManager {

private final SafeModeMetrics safeModeMetrics;


// TODO: Remove allContainers argument. (HDDS-11795)
public SCMSafeModeManager(ConfigurationSource conf,
ContainerManager containerManager, PipelineManager pipelineManager,
EventQueue eventQueue, SCMServiceManager serviceManager,
SCMContext scmContext) {
NodeManager nodeManager, EventQueue eventQueue,
SCMServiceManager serviceManager, SCMContext scmContext) {
this.config = conf;
this.eventPublisher = eventQueue;
this.serviceManager = serviceManager;
Expand All @@ -124,7 +124,7 @@ public SCMSafeModeManager(ConfigurationSource conf,

// TODO: Remove the cyclic ("this") dependency (HDDS-11797)
SafeModeRuleFactory.initialize(config, scmContext, eventQueue,
this, pipelineManager, containerManager);
this, pipelineManager, containerManager, nodeManager);
SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance();

exitRules = factory.getSafeModeRules().stream().collect(
Expand Down Expand Up @@ -346,6 +346,10 @@ public OneReplicaPipelineSafeModeRule getOneReplicaPipelineSafeModeRule() {
exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE);
}

public DataNodeSafeModeRule getDataNodeSafeModeRule() {
return (DataNodeSafeModeRule) exitRules.get(DN_EXIT_RULE);
}


/**
* Class used during SafeMode status event.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.server.events.EventQueue;

Expand All @@ -40,6 +41,7 @@ public final class SafeModeRuleFactory {
private final SCMSafeModeManager safeModeManager;
private final PipelineManager pipelineManager;
private final ContainerManager containerManager;
private final NodeManager nodeManager;

private final List<SafeModeExitRule<?>> safeModeRules;
private final List<SafeModeExitRule<?>> preCheckRules;
Expand All @@ -51,13 +53,15 @@ private SafeModeRuleFactory(final ConfigurationSource config,
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
final ContainerManager containerManager,
final NodeManager nodeManager) {
this.config = config;
this.scmContext = scmContext;
this.eventQueue = eventQueue;
this.safeModeManager = safeModeManager;
this.pipelineManager = pipelineManager;
this.containerManager = containerManager;
this.nodeManager = nodeManager;
this.safeModeRules = new ArrayList<>();
this.preCheckRules = new ArrayList<>();
loadRules();
Expand All @@ -68,7 +72,7 @@ private void loadRules() {
SafeModeExitRule<?> containerRule = new ContainerSafeModeRule(eventQueue,
config, containerManager, safeModeManager);
SafeModeExitRule<?> datanodeRule = new DataNodeSafeModeRule(eventQueue,
config, safeModeManager);
config, nodeManager, safeModeManager);

safeModeRules.add(containerRule);
safeModeRules.add(datanodeRule);
Expand Down Expand Up @@ -104,9 +108,10 @@ public static synchronized void initialize(
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
final ContainerManager containerManager,
final NodeManager nodeManager) {
instance = new SafeModeRuleFactory(config, scmContext, eventQueue,
safeModeManager, pipelineManager, containerManager);
safeModeManager, pipelineManager, containerManager, nodeManager);
}

public List<SafeModeExitRule<?>> getSafeModeRules() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ private void initializeSystemManagers(OzoneConfiguration conf,
scmSafeModeManager = configurator.getScmSafeModeManager();
} else {
scmSafeModeManager = new SCMSafeModeManager(conf,
containerManager, pipelineManager, eventQueue,
containerManager, pipelineManager, scmNodeManager, eventQueue,
serviceManager, scmContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ void setUp(@TempDir File tempDir) throws Exception {
new ContainerReplicaPendingOps(
Clock.system(ZoneId.systemDefault())));
SCMSafeModeManager safeModeManager = new SCMSafeModeManager(conf,
containerManager, pipelineManager, eventQueue, serviceManager, scmContext) {
containerManager, pipelineManager, nodeManager, eventQueue, serviceManager, scmContext) {
@Override
public void emitSafeModeStatus() {
// skip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.pipeline.choose.algorithms.HealthyPipelineChoosePolicy;
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
Expand Down Expand Up @@ -356,10 +357,9 @@ public void testClosePipelineShouldFailOnFollower() throws Exception {
@Test
public void testPipelineReport() throws Exception {
try (PipelineManagerImpl pipelineManager = createPipelineManager(true)) {
SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(conf,
mock(ContainerManager.class), pipelineManager,
new EventQueue(), serviceManager, scmContext);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(conf,
mock(ContainerManager.class), pipelineManager, mock(NodeManager.class),
new EventQueue(), serviceManager, scmContext);
Pipeline pipeline = pipelineManager
.createPipeline(RatisReplicationConfig
.getInstance(ReplicationFactor.THREE));
Expand Down Expand Up @@ -467,10 +467,9 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception {
assertEquals(Pipeline.PipelineState.ALLOCATED,
pipelineManager.getPipeline(pipeline.getId()).getPipelineState());

SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(new OzoneConfiguration(),
mock(ContainerManager.class), pipelineManager, new EventQueue(),
serviceManager, scmContext);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(new OzoneConfiguration(),
mock(ContainerManager.class), pipelineManager, mock(NodeManager.class),
new EventQueue(), serviceManager, scmContext);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(scmSafeModeManager, pipelineManager,
SCMContext.emptyContext(), conf);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdds.scm.safemode;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.LoggerFactory;

/**
* This class tests DataNodeSafeModeRule.
*/
public class TestDataNodeSafeModeRule {

@TempDir
private Path tempDir;
private DataNodeSafeModeRule rule;
private EventQueue eventQueue;
private SCMServiceManager serviceManager;
private SCMContext scmContext;
private NodeManager nodeManager;

private void setup(int requiredDns) throws Exception {
OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
ozoneConfiguration.set(HddsConfigKeys.OZONE_METADATA_DIRS,
tempDir.toString());
ozoneConfiguration.setInt(
HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, requiredDns);

List<ContainerInfo> containers =
new ArrayList<>(HddsTestUtils.getContainerInfo(1));
nodeManager = mock(NodeManager.class);
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers()).thenReturn(containers);
eventQueue = new EventQueue();
serviceManager = new SCMServiceManager();
scmContext = SCMContext.emptyContext();

SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(ozoneConfiguration, containerManager,
null, nodeManager, eventQueue, serviceManager, scmContext);

rule = scmSafeModeManager.getDataNodeSafeModeRule();
assertNotNull(rule);

rule.setValidateBasedOnReportProcessing(true);
}

@Test
public void testDataNodeSafeModeRuleWithNoNodes() throws Exception {
int requiredDns = 1;
setup(requiredDns);

GenericTestUtils.LogCapturer logCapturer =
GenericTestUtils.LogCapturer.captureLogs(
LoggerFactory.getLogger(SCMSafeModeManager.class));

assertFalse(rule.validate());

DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails();
NodeRegistrationContainerReport nodeReg =
new NodeRegistrationContainerReport(dd, null);

eventQueue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, nodeReg);

GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
"SCM in safe mode. 1 DataNodes registered, 1 required."), 1000, 5000);

assertTrue(rule.validate());
}

@Test
public void testDataNodeSafeModeRuleWithMultipleNodes() throws Exception {
int requiredDns = 3;
setup(requiredDns);

GenericTestUtils.LogCapturer logCapturer =
GenericTestUtils.LogCapturer.captureLogs(
LoggerFactory.getLogger(SCMSafeModeManager.class));

assertFalse(rule.validate());

for (int i = 0; i < 2; i++) {
DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails();
NodeRegistrationContainerReport nodeReg =
new NodeRegistrationContainerReport(dd, null);

eventQueue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, nodeReg);
}

GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
"SCM in safe mode. 2 DataNodes registered, 3 required."), 1000, 5000);

assertFalse(rule.validate());

DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails();
NodeRegistrationContainerReport nodeReg =
new NodeRegistrationContainerReport(dd, null);

eventQueue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, nodeReg);

GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
"SCM in safe mode. 3 DataNodes registered, 3 required."), 1000, 5000);

assertTrue(rule.validate());
}

@Test
public void testDataNodeSafeModeRuleWithNodeManager() throws Exception {
int requiredDns = 2;
setup(requiredDns);

rule.setValidateBasedOnReportProcessing(false);

when(nodeManager.getNodes(NodeStatus.inServiceHealthy())).thenReturn(new ArrayList<>());

assertFalse(rule.validate());

List<DatanodeDetails> healthyNodes = new ArrayList<>();
for (int i = 0; i < requiredDns; i++) {
DatanodeDetails dd = MockDatanodeDetails.randomDatanodeDetails();
healthyNodes.add(dd);
}

when(nodeManager.getNodes(NodeStatus.inServiceHealthy()))
.thenReturn(healthyNodes);

assertTrue(rule.validate());

verify(nodeManager, times(2)).getNodes(NodeStatus.inServiceHealthy());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines()
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
mockRatisProvider);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containerManager, pipelineManager, eventQueue,
config, containerManager, pipelineManager, nodeManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand Down Expand Up @@ -177,7 +177,7 @@ public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);

SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containerManager, pipelineManager, eventQueue,
config, containerManager, pipelineManager, nodeManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand Down Expand Up @@ -273,7 +273,7 @@ public void testHealthyPipelineSafeModeRuleWithMixedPipelines()
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);

SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containerManager, pipelineManager, eventQueue,
config, containerManager, pipelineManager, nodeManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand Down
Loading