Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.hadoop.hdds.annotation.InterfaceStability;
import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.container.ContainerReplicaInfo;
Expand Down Expand Up @@ -420,4 +421,7 @@ StatusAndMessages finalizeScmUpgrade(String upgradeClientID)
StatusAndMessages queryUpgradeFinalizationProgress(
String upgradeClientID, boolean force, boolean readonly)
throws IOException;

DecommissionScmResponseProto decommissionScm(
String scmId) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
Expand Down Expand Up @@ -349,6 +350,7 @@ List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,

Map<String, Pair<Boolean, String>> getSafeModeRuleStatuses()
throws IOException;

/**
* Force SCM out of Safe mode.
*
Expand Down Expand Up @@ -441,6 +443,7 @@ StatusAndMessages finalizeScmUpgrade(String upgradeClientID)
StatusAndMessages queryUpgradeFinalizationProgress(
String upgradeClientID, boolean force, boolean readonly)
throws IOException;

/**
* Obtain a token which can be used to let datanodes verify authentication of
* commands operating on {@code containerID}.
Expand All @@ -455,4 +458,7 @@ long getContainerCount(HddsProtos.LifeCycleState state)
List<ContainerInfo> getListOfContainers(
long startContainerID, int count, HddsProtos.LifeCycleState state)
throws IOException;

DecommissionScmResponseProto decommissionScm(
String scmId) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DeactivatePipelineRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionNodesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionNodesResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerRequestProto;
Expand Down Expand Up @@ -1076,4 +1078,19 @@ public List<ContainerInfo> getListOfContainers(
throws IOException {
return listContainer(startContainerID, count, state);
}

@Override
public DecommissionScmResponseProto decommissionScm(
String scmId) throws IOException {

DecommissionScmRequestProto request = DecommissionScmRequestProto
.newBuilder()
.setScmId(scmId)
.build();
DecommissionScmResponseProto response =
submitRequest(Type.DecommissionScm,
builder -> builder.setDecommissionScmRequest(request))
.getDecommissionScmResponse();
return response;
}
}
12 changes: 12 additions & 0 deletions hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ message ScmContainerLocationRequest {
optional ResetDeletedBlockRetryCountRequestProto resetDeletedBlockRetryCountRequest = 41;
optional TransferLeadershipRequestProto transferScmLeadershipRequest = 42;
optional GetFailedDeletedBlocksTxnRequestProto getFailedDeletedBlocksTxnRequest = 43;
optional DecommissionScmRequestProto decommissionScmRequest = 44;
}

message ScmContainerLocationResponse {
Expand Down Expand Up @@ -131,6 +132,7 @@ message ScmContainerLocationResponse {
optional ResetDeletedBlockRetryCountResponseProto resetDeletedBlockRetryCountResponse = 41;
optional TransferLeadershipResponseProto transferScmLeadershipResponse = 42;
optional GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxnResponse = 43;
optional DecommissionScmResponseProto decommissionScmResponse = 44;

enum Status {
OK = 1;
Expand Down Expand Up @@ -181,6 +183,7 @@ enum Type {
GetClosedContainerCount = 37;
TransferLeadership = 38;
GetFailedDeletedBlocksTransaction = 39;
DecommissionScm = 40;
}

/**
Expand Down Expand Up @@ -573,6 +576,15 @@ message ContainerBalancerStatusResponseProto {
required bool isRunning = 1;
}

message DecommissionScmRequestProto {
required string scmId = 1;
}

message DecommissionScmResponseProto {
required bool success = 1;
optional string errorMsg = 2;
}

/**
* Protocol used from an HDFS node to StorageContainerManager. See the request
* and response messages for details of the RPC calls.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DeactivatePipelineResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionNodesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionNodesResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.FinalizeScmUpgradeRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.FinalizeScmUpgradeResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeRequestProto;
Expand Down Expand Up @@ -683,6 +685,13 @@ public ScmContainerLocationResponse processRequest(
transferScmLeadership(
request.getTransferScmLeadershipRequest()))
.build();
case DecommissionScm:
return ScmContainerLocationResponse.newBuilder()
.setCmdType(request.getCmdType())
.setStatus(Status.OK)
.setDecommissionScmResponse(decommissionScm(
request.getDecommissionScmRequest()))
.build();
default:
throw new IllegalArgumentException(
"Unknown command type: " + request.getCmdType());
Expand Down Expand Up @@ -1210,4 +1219,10 @@ public TransferLeadershipResponseProto transferScmLeadership(
impl.transferLeadership(newLeaderId);
return TransferLeadershipResponseProto.getDefaultInstance();
}

public DecommissionScmResponseProto decommissionScm(
DecommissionScmRequestProto request) throws IOException {
return impl.decommissionScm(
request.getScmId());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto.Builder;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolPB;
import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolServerSideTranslatorPB;
Expand Down Expand Up @@ -1324,4 +1326,21 @@ public List<String> listReconfigureProperties() throws IOException {
public void close() throws IOException {
stop();
}

@Override
public DecommissionScmResponseProto decommissionScm(
String scmId) {
Builder decommissionScmResponseBuilder =
DecommissionScmResponseProto.newBuilder();

try {
decommissionScmResponseBuilder
.setSuccess(scm.removePeerFromHARing(scmId));
} catch (IOException ex) {
decommissionScmResponseBuilder
.setSuccess(false)
.setErrorMsg(ex.getMessage());
}
return decommissionScmResponseBuilder.build();
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some more tests here to verify other scenarios as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@neils-dev, thanks for working on this.
Please create a follow-up Jira to add more unit-tests.

Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.server;

import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmRequestProto;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocolServerSideTranslatorPB;
import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics;
import org.apache.ozone.test.GenericTestUtils;

import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.mockito.Mockito;

import java.io.File;

/**
* Unit tests to validate the SCMClientProtocolServer
* servicing commands from the scm client.
*/
public class TestSCMClientProtocolServer {
private OzoneConfiguration config;
private SCMClientProtocolServer server;
private StorageContainerManager scm;
private StorageContainerLocationProtocolServerSideTranslatorPB service;

@BeforeEach
public void setUp() throws Exception {
config = new OzoneConfiguration();
File dir = GenericTestUtils.getRandomizedTestDir();
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
SCMConfigurator configurator = new SCMConfigurator();
configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true));
configurator.setScmContext(SCMContext.emptyContext());
scm = HddsTestUtils.getScm(config, configurator);
scm.start();
scm.exitSafeMode();

server = scm.getClientProtocolServer();
service = new StorageContainerLocationProtocolServerSideTranslatorPB(server,
scm, Mockito.mock(ProtocolMessageMetrics.class));
}

@AfterEach
public void tearDown() throws Exception {
if (scm != null) {
scm.stop();
scm.join();
}
}

/**
* Tests decommissioning of scm.
*/
@Test
public void testScmDecommissionRemoveScmErrors() throws Exception {
String scmId = scm.getScmId();
String err = "Cannot remove current leader.";

DecommissionScmRequestProto request =
DecommissionScmRequestProto.newBuilder()
.setScmId(scmId)
.build();

DecommissionScmResponseProto resp =
service.decommissionScm(request);

// should have optional error message set in response
assertTrue(resp.hasErrorMsg());
assertTrue(resp.getErrorMsg()
.equals(err));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
Expand Down Expand Up @@ -509,4 +510,12 @@ public StatusAndMessages queryUpgradeFinalizationProgress(
return storageContainerLocationClient.queryUpgradeFinalizationProgress(
upgradeClientID, force, readonly);
}

@Override
public DecommissionScmResponseProto decommissionScm(
String scmId)
throws IOException {
return storageContainerLocationClient.decommissionScm(scmId);
}

}
6 changes: 5 additions & 1 deletion hadoop-ozone/dev-support/intellij/ozone-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
limitations under the License.
-->
<configuration>
<property>
<name>ozone.default.bucket.layout</name>
<value>LEGACY</value>
</property>
<property>
<name>hdds.profiler.endpoint.enabled</name>
<value>true</value>
Expand Down Expand Up @@ -96,4 +100,4 @@
<name>ozone.metastore.rocksdb.statistics</name>
<value>ALL</value>
</property>
</configuration>
</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.admin.scm;

import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
import org.apache.hadoop.hdds.scm.client.ScmClient;
import picocli.CommandLine;

import java.io.IOException;

/**
* Handler of ozone admin scm decommission command.
*/
@CommandLine.Command(
name = "decommission",
description = "Decommission SCM <scmid>. Includes removing from ratis "
+ "ring and removing its certificate from certStore",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)


public class DecommissionScmSubcommand extends ScmSubcommand {
@CommandLine.ParentCommand
private ScmAdmin parent;

@CommandLine.Option(names = {"-nodeid", "--nodeid"},
description = "NodeID of the SCM to be decommissioned.",
required = true)
private String nodeId;

@Override
public void execute(ScmClient scmClient) throws IOException {
DecommissionScmResponseProto response = scmClient.decommissionScm(nodeId);
if (!response.getSuccess()) {
System.out.println("Error decommissioning Scm " + nodeId);
if (response.hasErrorMsg()) {
System.out.println(response.getErrorMsg());
}
} else {
System.out.println("Decommissioned Scm " + nodeId);
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
FinalizeScmUpgradeSubcommand.class,
FinalizationScmStatusSubcommand.class,
TransferScmLeaderSubCommand.class,
DeletedBlocksTxnCommands.class
DeletedBlocksTxnCommands.class,
DecommissionScmSubcommand.class
})
@MetaInfServices(SubcommandWithParent.class)
public class ScmAdmin extends GenericCli implements SubcommandWithParent {
Expand Down
Loading