Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,13 @@ public static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader,
ozoneConfiguration);
}

public static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, RaftGroup group,
RetryPolicy retryPolicy, GrpcTlsConfig tlsConfig,
ConfigurationSource configuration) {
return newRaftClient(rpcType, leader.getId(), leader, group, retryPolicy, tlsConfig, configuration);
}


@SuppressWarnings("checkstyle:ParameterNumber")
private static RaftClient newRaftClient(RpcType rpcType, RaftPeerId leader,
RaftPeer primary, RaftGroup group, RetryPolicy retryPolicy,
Expand Down
10 changes: 10 additions & 0 deletions hadoop-ozone/tools/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
<groupId>org.apache.ozone</groupId>
<artifactId>hdds-hadoop-dependency-server</artifactId>
</dependency>
<dependency>
<artifactId>ratis-common</artifactId>
<groupId>org.apache.ratis</groupId>
<version>${ratis.version}</version>
</dependency>
<dependency>
<artifactId>ratis-shell</artifactId>
<groupId>org.apache.ratis</groupId>
<version>${ratis.version}</version>
</dependency>

<dependency>
<groupId>jakarta.xml.bind</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.admin.ratis;

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.ratis.RatisHelper;
import org.apache.hadoop.hdds.scm.client.ClientTrustManager;
import org.apache.hadoop.hdds.security.SecurityConfig;
import org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.hadoop.ozone.client.OzoneClientFactory;
import org.apache.hadoop.ozone.om.helpers.ServiceInfoEx;
import org.apache.ratis.client.RaftClient;
import org.apache.ratis.grpc.GrpcTlsConfig;
import org.apache.ratis.proto.RaftProtos.FollowerInfoProto;
import org.apache.ratis.proto.RaftProtos.RaftPeerProto;
import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
import org.apache.ratis.protocol.GroupInfoReply;
import org.apache.ratis.protocol.RaftGroup;
import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.protocol.RaftPeer;
import org.apache.ratis.rpc.SupportedRpcType;

import java.util.List;

import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY;
import static org.apache.ratis.shell.cli.RaftUtils.buildRaftGroupIdFromStr;
import static org.apache.ratis.shell.cli.RaftUtils.buildRaftPeersFromStr;
import static org.apache.ratis.shell.cli.RaftUtils.retrieveGroupInfoByGroupId;
import static org.apache.ratis.shell.cli.RaftUtils.retrieveRemoteGroupId;

/**
* The base class for all the ratis command.
*/
public class BaseRatisCommand {
private RaftGroup raftGroup;
private GroupInfoReply groupInfoReply;
private OzoneConfiguration config;
private String omServiceId;

public void run(String peers, String groupid, String omServiceId) throws Exception {
this.config = new OzoneConfiguration();
this.omServiceId = omServiceId;
List<RaftPeer> peerList = buildRaftPeersFromStr(peers);
RaftGroupId raftGroupIdFromConfig = buildRaftGroupIdFromStr(groupid);
raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peerList);

try (RaftClient client = buildRaftClient(raftGroup)) {
RaftGroupId remoteGroupId = retrieveRemoteGroupId(raftGroupIdFromConfig, peerList, client, System.out);
groupInfoReply = retrieveGroupInfoByGroupId(remoteGroupId, peerList, client, System.out);
raftGroup = groupInfoReply.getGroup();
}

}

protected RaftClient buildRaftClient(RaftGroup raftGroup) throws Exception {
GrpcTlsConfig tlsConfig = null;
if (config.getBoolean(OZONE_SECURITY_ENABLED_KEY, OZONE_SECURITY_ENABLED_DEFAULT) &&
config.getBoolean(HDDS_GRPC_TLS_ENABLED, HDDS_GRPC_TLS_ENABLED_DEFAULT)) {
tlsConfig = createGrpcTlsConf(omServiceId);
}

return RatisHelper.newRaftClient(
SupportedRpcType.GRPC, null, raftGroup,
RatisHelper.createRetryPolicy(config), tlsConfig, config);
}

public RaftGroup getRaftGroup() {
return raftGroup;
}

public GroupInfoReply getGroupInfoReply() {
return groupInfoReply;
}

/**
* Get the leader id.
*
* @param roleInfo the role info
* @return the leader id
*/
protected RaftPeerProto getLeader(RoleInfoProto roleInfo) {
if (roleInfo == null) {
return null;
}
if (roleInfo.getRole() == RaftPeerRole.LEADER) {
return roleInfo.getSelf();
}
FollowerInfoProto followerInfo = roleInfo.getFollowerInfo();
if (followerInfo == null) {
return null;
}
return followerInfo.getLeaderInfo().getId();
}

private GrpcTlsConfig createGrpcTlsConf(String omServiceId) throws Exception {
OzoneClient certClient = OzoneClientFactory.getRpcClient(omServiceId,
config);

ServiceInfoEx serviceInfoEx = certClient
.getObjectStore()
.getClientProxy()
.getOzoneManagerClient()
.getServiceInfo();

CACertificateProvider remoteCAProvider =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this remoteProvider definition is intentional or is a misunderstanding of the ClientTrustManager's intents, but let me provide the intent based on which I hope you can decide, and course correct if needed.

The ClientTrustManager is designed in this way to ensure we can give the long running clients a possibility to renew their knowledge of the Ozone cluster's internal rootCA certificate that the client side uses to verify the identity of DataNodes in case of a rootCA rotation that happens some time before the rootCA actually expires, and is a long process by default during which there might be multiple rootCA certificates playing the role of the root of trust (the old one and the new one), after which period the new rootCA becomes the only valid one (but that does not affect the client side which can run further with the two together at the moment).

The current way of initializing the ClientTrustManager here, leads to a state where this renew does not happen, as both the remote and the in-memory provider is the same (the remote basically delegates to the in-memory instance via a lambda expression). This way of operation in the ClientTrustManager can be achieved by specifying the remote provider to null, as in both cases no additional RPC call would be done even if there is verification problem with the certificate (the new DN cert is signed by a different CA).

If this is a short running client which it is (as it runs one or more RPC calls in a short period of time and exits). If we can assume that these RPC calls are idempotent, then we don't need the remote provider, as in case of a rare event when two or more RPCs fall to the different sides on a rootCA rotation in time, then the whole thing can be done once more and it will not fail with a certificate issue the second time.

If this would be a long running client that issues multiple RPCs over a longer period of time, then it does worth it to provide a remote provider other than null, and in this case the best a remote provider can do to refresh the root of trust is to call OM's getServiceInfo method via an RPC, and provide the root(s) of trust based on the result of that RPC call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @fapifta for the detailed explanation and recommendations regarding the ClientTrustManager and the remoteProvider configuration. In my case, I am indeed dealing with short-running clients that perform one RPC calls and then exit.
Based on your guidance, setting remoteProvider to null is an acceptable approach for my use case. But I'm open to any other suggestions or alternative approaches if other folks might have.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't calculated it exactly but if my memory serves, by default anything under 2 hours of runtime is safe without a remote provider, and in any reasonable setup problems will not arise with a client that does live for just a few seconds or even a few minutes, so yes, in this use case null should be completely fine.

serviceInfoEx::provideCACerts;
ClientTrustManager trustManager = new ClientTrustManager(remoteCAProvider, serviceInfoEx);

return RatisHelper.createTlsClientConfig(new SecurityConfig(config), trustManager);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point we also have a potential problem...

As I understand this PR, the goal is to provide CLI commands that an admin can run and via which the admin can issue commands to a Ratis Group directly from the CLI.

If this understanding of mine is correct, then we may run into a problem with mutual TLS authentication that happens between the raft peers, because the createTlsClientConfig call creates a client side config that does not apply mTLS because the client does not have its own certificate that our internal PKI system trusts, and also it does not have a way to acquire such a certificate from the internal PKI system.

So unless the client here talks to a ratis client port that does not require mTLS but uses other authentication mechanisms like Kerberos, then this approach ain't gonna fly and honestly in this case we need to figure out if we can and how we can allow the client to get its own certificate to use mTLS with the raft server side. (Also we might need to offload this from SCM if these calls are something clients would require potentially often, as we can not afford to generate a lot of certificate on the SCM side as it is a costly operation.) Not to mention the complexity of ensuring that only valid users can grab a cert from the system.
OTOH, if these raft servers do not use mTLS then how they authenticate and identify each others?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think currently the client side does not require mTLS authentication (per comment in https://github.com/apache/ozone/blob/master/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java#L421-L422). But I could be wrong, does any other folk have additional insights or recommendations regarding the authentication and security aspects of this problem? (cc. @szetszwo @ChenSammi @Galsza

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will let @szetszwo to chime in on this one... if we need mTLS the problem is there, if the API itself does not require authentication, then we should be good to go.

However, I think, if there is no authentication then we need to be sure that there are no administrative commands that can be triggered via the Ratis client and we also need to be sure that there is no sensitive information that are emitted in the response.

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.admin.ratis;

import org.apache.hadoop.hdds.cli.GenericCli;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.cli.OzoneAdmin;
import org.apache.hadoop.hdds.cli.SubcommandWithParent;
import org.apache.hadoop.ozone.admin.ratis.group.GroupCommand;
import org.kohsuke.MetaInfServices;
import picocli.CommandLine;
import picocli.CommandLine.Model.CommandSpec;
import picocli.CommandLine.Spec;


/**
* Subcommand for ratis operations.
*/
@CommandLine.Command(
name = "ratis",
description = "Ozone ratis operations",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class,
subcommands = {
GroupCommand.class
})
@MetaInfServices(SubcommandWithParent.class)
public class RatisAdmin extends GenericCli implements SubcommandWithParent {

@CommandLine.ParentCommand
private OzoneAdmin parent;

@Spec
private CommandSpec spec;

public OzoneAdmin getParent() {
return parent;
}

@Override
public Void call() throws Exception {
GenericCli.missingSubcommand(spec);
return null;
}

@Override
public Class<?> getParentType() {
return OzoneAdmin.class;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package org.apache.hadoop.ozone.admin.ratis.group;

import org.apache.hadoop.hdds.cli.GenericCli;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.hdds.cli.SubcommandWithParent;
import org.apache.hadoop.ozone.admin.ratis.RatisAdmin;
import org.kohsuke.MetaInfServices;
import picocli.CommandLine;

/**
* Subcommand for ratis.
*/
@CommandLine.Command(
name = "group",
description = "ratis group operations",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class,
subcommands = {
InfoCommand.class,
ListCommand.class
})
@MetaInfServices(SubcommandWithParent.class)
public class GroupCommand extends GenericCli implements SubcommandWithParent {

@CommandLine.ParentCommand
private RatisAdmin parent;

@CommandLine.Spec
private CommandLine.Model.CommandSpec spec;

public RatisAdmin getParent() {
return parent;
}

@Override
public Void call() throws Exception {
GenericCli.missingSubcommand(spec);
return null;
}

@Override
public Class<?> getParentType() {
return RatisAdmin.class;
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.admin.ratis.group;

import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.ozone.admin.ratis.BaseRatisCommand;
import org.apache.ratis.proto.RaftProtos;
import org.apache.ratis.protocol.GroupInfoReply;
import picocli.CommandLine;

import java.util.concurrent.Callable;

/**
* Display the information of a specific raft group
*/
@CommandLine.Command(
name = "info",
description = "Display the information of a specific raft group",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class InfoCommand extends BaseRatisCommand implements Callable<Void> {

@CommandLine.ParentCommand
public GroupCommand parent;

@CommandLine.Option(names = {"-peers"},
description = "list of peers",
required = true)
private String peers;

@CommandLine.Option(names = { "-groupid" },
description = "groupid")
private String groupid;

@CommandLine.Option(names = {"-id", "--service-id"},
description = "OM Service ID",
required = true)
private String omServiceId;


@Override
public Void call() throws Exception {
super.run(peers, groupid, omServiceId);
System.out.println("group id: " + getRaftGroup().getGroupId().getUuid());
final GroupInfoReply reply = getGroupInfoReply();
RaftProtos.RaftPeerProto leader = getLeader(reply.getRoleInfoProto());
if (leader == null) {
System.out.println("leader not found");
} else {
System.out.printf("leader info: %s(%s)%n%n", leader.getId().toStringUtf8(), leader.getAddress());
}
System.out.println(reply.getCommitInfos());
return null;
}

}
Loading