Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1643,17 +1643,6 @@
</description>
</property>

<property>
<name>ozone.om.ratis.server.role.check.interval</name>
<value>15s</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>The interval between OM leader performing a role
check on its ratis server. Ratis server informs OM if it
loses the leader role. The scheduled check is an secondary
check to ensure that the leader role is updated periodically
.</description>
</property>

<property>
<name>ozone.om.ratis.snapshot.dir</name>
<value/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,6 @@ private OMConfigKeys() {
OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
= TimeDuration.valueOf(120, TimeUnit.SECONDS);

// OM Leader server role check interval
public static final String OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY
= "ozone.om.ratis.server.role.check.interval";
public static final TimeDuration
OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
= TimeDuration.valueOf(15, TimeUnit.SECONDS);

// OM SnapshotProvider configurations
public static final String OZONE_OM_RATIS_SNAPSHOT_DIR =
"ozone.om.ratis.snapshot.dir";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public List<OzoneManager> getOzoneManagersList() {
public OzoneManager getOMLeader() {
OzoneManager res = null;
for (OzoneManager ozoneManager : this.ozoneManagers) {
if (ozoneManager.isLeader()) {
if (ozoneManager.isLeaderReady()) {
if (res != null) {
// Found more than one leader
// Return null, expect the caller to retry in a while
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,6 @@ public void testGetOMLeader() throws InterruptedException, TimeoutException {
Assert.assertNotNull("Timed out waiting OM leader election to finish: "
+ "no leader or more than one leader.", ozoneManager);
Assert.assertTrue("Should have gotten the leader!",
ozoneManager.get().isLeader());
ozoneManager.get().isLeaderReady());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception {
Assert.assertNotNull("Timed out waiting OM leader election to finish: "
+ "no leader or more than one leader.", ozoneManager);
Assert.assertTrue("Should have gotten the leader!",
ozoneManager.get().isLeader());
ozoneManager.get().isLeaderReady());

OzoneManagerServiceProviderImpl impl = (OzoneManagerServiceProviderImpl)
cluster.getReconServer().getOzoneManagerServiceProvider();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ private boolean shouldRun() {
// OzoneManager can be null for testing
return true;
}
return ozoneManager.isLeader();
return ozoneManager.isLeaderReady();
}

private boolean isRatisEnabled() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@
import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.KEY_NOT_FOUND;
import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.TOKEN_ERROR_OTHER;
import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_LOCK;
import static org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.LEADER_AND_READY;
import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneManagerService.newReflectiveBlockingService;

import org.apache.hadoop.util.Time;
Expand Down Expand Up @@ -3502,21 +3503,16 @@ public String getComponent() {
public long getMaxUserVolumeCount() {
return maxUserVolumeCount;
}

/**
* Checks the Leader status of OM Ratis Server.
* Note that this status has a small window of error. It should not be used
* to determine the absolute leader status.
* If it is the leader, the role status is cached till Ratis server
* notifies of leader change. If it is not leader, the role information is
* retrieved through by submitting a GroupInfoRequest to Ratis server.
* <p>
* If ratis is not enabled, then it always returns true.
* Return true, if the current OM node is leader and in ready state to
* process the requests.
*
* @return Return true if this node is the leader, false otherwsie.
* If ratis is not enabled, then it always returns true.
* @return
*/
public boolean isLeader() {
return isRatisEnabled ? omRatisServer.isLeader() : true;
public boolean isLeaderReady() {
return isRatisEnabled ?
omRatisServer.checkLeaderStatus() == LEADER_AND_READY : true;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,8 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import com.google.common.base.Preconditions;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
Expand All @@ -59,11 +54,7 @@
import org.apache.ratis.conf.RaftProperties;
import org.apache.ratis.grpc.GrpcConfigKeys;
import org.apache.ratis.netty.NettyConfigKeys;
import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
import org.apache.ratis.protocol.ClientId;
import org.apache.ratis.protocol.GroupInfoReply;
import org.apache.ratis.protocol.GroupInfoRequest;
import org.apache.ratis.protocol.exceptions.LeaderNotReadyException;
import org.apache.ratis.protocol.exceptions.NotLeaderException;
import org.apache.ratis.protocol.exceptions.StateMachineException;
Expand All @@ -78,8 +69,9 @@
import org.apache.ratis.rpc.SupportedRpcType;
import org.apache.ratis.server.RaftServer;
import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.server.impl.RaftServerImpl;
import org.apache.ratis.server.impl.RaftServerProxy;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.util.LifeCycle;
import org.apache.ratis.util.SizeInBytes;
import org.apache.ratis.util.StringUtils;
Expand All @@ -106,20 +98,6 @@ public final class OzoneManagerRatisServer {

private final OzoneManager ozoneManager;
private final OzoneManagerStateMachine omStateMachine;
private final ClientId clientId = ClientId.randomId();

private final ScheduledExecutorService scheduledRoleChecker;
private long roleCheckInitialDelayMs = 1000; // 1 second default
private long roleCheckIntervalMs;
private ReentrantReadWriteLock roleCheckLock = new ReentrantReadWriteLock();
private Optional<RaftPeerRole> cachedPeerRole = Optional.empty();
private Optional<RaftPeerId> cachedLeaderPeerId = Optional.empty();

private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();

private static long nextCallId() {
return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE;
}

/**
* Submit request to Ratis server.
Expand Down Expand Up @@ -301,20 +279,6 @@ private OzoneManagerRatisServer(ConfigurationSource conf,
.setProperties(serverProperties)
.setStateMachine(omStateMachine)
.build();

// Run a scheduler to check and update the server role on the leader
// periodically
this.scheduledRoleChecker = Executors.newSingleThreadScheduledExecutor();
this.scheduledRoleChecker.scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
// Run this check only on the leader OM
if (cachedPeerRole.isPresent() &&
cachedPeerRole.get() == RaftPeerRole.LEADER) {
updateServerRole();
}
}
}, roleCheckInitialDelayMs, roleCheckIntervalMs, TimeUnit.MILLISECONDS);
}

/**
Expand Down Expand Up @@ -556,19 +520,6 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) {
RaftServerConfigKeys.Rpc.setSlownessTimeout(properties,
nodeFailureTimeout);

TimeUnit roleCheckIntervalUnit =
OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
.getUnit();
long roleCheckIntervalDuration = conf.getTimeDuration(
OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY,
OMConfigKeys.OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
.getDuration(), nodeFailureTimeoutUnit);
this.roleCheckIntervalMs = TimeDuration.valueOf(
roleCheckIntervalDuration, roleCheckIntervalUnit)
.toLong(TimeUnit.MILLISECONDS);
this.roleCheckInitialDelayMs = leaderElectionMinTimeout
.toLong(TimeUnit.MILLISECONDS);

// Set auto trigger snapshot. We don't need to configure auto trigger
// threshold in OM, as last applied index is flushed during double buffer
// flush automatically. (But added this property internally, so that this
Expand All @@ -591,107 +542,39 @@ private RaftProperties newRaftProperties(ConfigurationSource conf) {
}

/**
* Check the cached leader status.
* @return true if cached role is Leader, false otherwise.
*/
private boolean checkCachedPeerRoleIsLeader() {
this.roleCheckLock.readLock().lock();
try {
if (cachedPeerRole.isPresent() &&
cachedPeerRole.get() == RaftPeerRole.LEADER) {
return true;
}
return false;
} finally {
this.roleCheckLock.readLock().unlock();
}
}

/**
* Check if the current OM node is the leader node.
* @return true if Leader, false otherwise.
*/
public boolean isLeader() {
if (checkCachedPeerRoleIsLeader()) {
return true;
}

// Get the server role from ratis server and update the cached values.
updateServerRole();

// After updating the server role, check and return if leader or not.
return checkCachedPeerRoleIsLeader();
}

/**
* Get the suggested leader peer id.
* @return RaftPeerId of the suggested leader node.
* Defines RaftServer Status.
*/
public Optional<RaftPeerId> getCachedLeaderPeerId() {
this.roleCheckLock.readLock().lock();
try {
return cachedLeaderPeerId;
} finally {
this.roleCheckLock.readLock().unlock();
}
public enum RaftServerStatus {
NOT_LEADER,
LEADER_AND_NOT_READY,
LEADER_AND_READY;
}

/**
* Get the gorup info (peer role and leader peer id) from Ratis server and
* update the OM server role.
* Check Leader status and return the state of the RaftServer.
*
* @return RaftServerStatus.
*/
public void updateServerRole() {
public RaftServerStatus checkLeaderStatus() {
Preconditions.checkState(server instanceof RaftServerProxy);
RaftServerImpl serverImpl;
try {
GroupInfoReply groupInfo = getGroupInfo();
RoleInfoProto roleInfoProto = groupInfo.getRoleInfoProto();
RaftPeerRole thisNodeRole = roleInfoProto.getRole();

if (thisNodeRole.equals(RaftPeerRole.LEADER)) {
setServerRole(thisNodeRole, raftPeerId);

} else if (thisNodeRole.equals(RaftPeerRole.FOLLOWER)) {
ByteString leaderNodeId = roleInfoProto.getFollowerInfo()
.getLeaderInfo().getId().getId();
// There may be a chance, here we get leaderNodeId as null. For
// example, in 3 node OM Ratis, if 2 OM nodes are down, there will
// be no leader.
RaftPeerId leaderPeerId = null;
if (leaderNodeId != null && !leaderNodeId.isEmpty()) {
leaderPeerId = RaftPeerId.valueOf(leaderNodeId);
serverImpl = ((RaftServerProxy) server).getImpl(raftGroupId);
if (serverImpl != null) {
if (!serverImpl.isLeader()) {
return RaftServerStatus.NOT_LEADER;
} else if (serverImpl.isLeaderReady()) {
return RaftServerStatus.LEADER_AND_READY;
} else {
return RaftServerStatus.LEADER_AND_NOT_READY;
}

setServerRole(thisNodeRole, leaderPeerId);

} else {
setServerRole(thisNodeRole, null);

}
} catch (IOException e) {
LOG.error("Failed to retrieve RaftPeerRole. Setting cached role to " +
"{} and resetting leader info.", RaftPeerRole.UNRECOGNIZED, e);
setServerRole(null, null);
}
}

/**
* Set the current server role and the leader peer id.
*/
private void setServerRole(RaftPeerRole currentRole,
RaftPeerId leaderPeerId) {
this.roleCheckLock.writeLock().lock();
try {
this.cachedPeerRole = Optional.ofNullable(currentRole);
this.cachedLeaderPeerId = Optional.ofNullable(leaderPeerId);
} finally {
this.roleCheckLock.writeLock().unlock();
} catch (IOException ioe) {
// In this case we return not a leader.
LOG.error("Fail to get RaftServer impl and therefore it's not clear " +
"whether it's leader. ", ioe);
}
}

private GroupInfoReply getGroupInfo() throws IOException {
GroupInfoRequest groupInfoRequest = new GroupInfoRequest(clientId,
raftPeerId, raftGroupId, nextCallId());
GroupInfoReply groupInfo = server.getGroupInfo(groupInfoRequest);
return groupInfo;
return RaftServerStatus.NOT_LEADER;
}

public int getServerPort() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,6 @@ public CompletableFuture<TermIndex> notifyInstallSnapshotFromLeader(
@Override
public void notifyNotLeader(Collection<TransactionContext> pendingEntries)
throws IOException {
omRatisServer.updateServerRole();
}

@Override
Expand Down
Loading