-
Notifications
You must be signed in to change notification settings - Fork 594
HDDS-3988: DN can distinguish SCMCommand from stale leader SCM #1314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| import java.util.LinkedList; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
| import java.util.Queue; | ||
| import java.util.Set; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
|
|
@@ -31,6 +32,7 @@ | |
| import java.util.concurrent.ThreadPoolExecutor; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.TimeoutException; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.concurrent.locks.Lock; | ||
| import java.util.concurrent.locks.ReentrantLock; | ||
|
|
@@ -80,6 +82,18 @@ public class StateContext { | |
| private boolean shutdownGracefully = false; | ||
| private final AtomicLong threadPoolNotAvailableCount; | ||
|
|
||
| /** | ||
| * term of latest leader SCM, extract from SCMCommand. | ||
| * | ||
| * Only leader SCM (both latest and stale) can send out SCMCommand, | ||
| * which will save its term in SCMCommand. Since latest leader SCM | ||
| * always has the highest term, term can be used to detect SCMCommand | ||
| * from stale leader SCM. | ||
| * | ||
| * For non-HA mode, term of SCMCommand will be 0. | ||
| */ | ||
| private Optional<Long> termOfLeaderSCM = Optional.empty(); | ||
|
|
||
| /** | ||
| * Starting with a 2 sec heartbeat frequency which will be updated to the | ||
| * real HB frequency after scm registration. With this method the | ||
|
|
@@ -470,6 +484,65 @@ public void execute(ExecutorService service, long time, TimeUnit unit) | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * After startup, datanode needs detect latest leader SCM before handling | ||
| * any SCMCommand, so that it won't be disturbed by stale leader SCM. | ||
| * | ||
| * The rule is: after majority SCMs are in HEARTBEAT state and has | ||
| * heard from leader SCMs (commandQueue is not empty), datanode will init | ||
| * termOfLeaderSCM with the max term found in commandQueue. | ||
| * | ||
| * The init process also works for non-HA mode. In that case, term of all | ||
| * SCMCommands will be 0. | ||
| */ | ||
| private void initTermOfLeaderSCM() { | ||
| // only init once | ||
| if (termOfLeaderSCM.isPresent()) { | ||
| return; | ||
| } | ||
|
|
||
| AtomicInteger scmNum = new AtomicInteger(0); | ||
| AtomicInteger activeScmNum = new AtomicInteger(0); | ||
|
|
||
| getParent().getConnectionManager().getValues() | ||
| .forEach(endpoint -> { | ||
| if (endpoint.isPassive()) { | ||
| return; | ||
| } | ||
| scmNum.incrementAndGet(); | ||
| if (endpoint.getState() | ||
| == EndpointStateMachine.EndPointStates.HEARTBEAT) { | ||
| activeScmNum.incrementAndGet(); | ||
| } | ||
| }); | ||
|
|
||
| // majority SCMs should be in HEARTBEAT state. | ||
| if (activeScmNum.get() < scmNum.get() / 2 + 1) { | ||
| return; | ||
| } | ||
|
|
||
| // if commandQueue is not empty, init termOfLeaderSCM | ||
| // with the largest term found in commandQueue | ||
| commandQueue.stream() | ||
| .mapToLong(SCMCommand::getTerm) | ||
| .max() | ||
| .ifPresent(term -> termOfLeaderSCM = Optional.of(term)); | ||
| } | ||
|
|
||
| /** | ||
| * monotonically increase termOfLeaderSCM. | ||
| * Always record the latest term that has seen. | ||
| */ | ||
| private void updateTermOfLeaderSCM(SCMCommand<?> command) { | ||
| if (!termOfLeaderSCM.isPresent()) { | ||
| LOG.error("should init termOfLeaderSCM before update it."); | ||
| return; | ||
| } | ||
|
|
||
| termOfLeaderSCM = Optional.of( | ||
| Long.max(termOfLeaderSCM.get(), command.getTerm())); | ||
| } | ||
|
|
||
| /** | ||
| * Returns the next command or null if it is empty. | ||
| * | ||
|
|
@@ -478,7 +551,26 @@ public void execute(ExecutorService service, long time, TimeUnit unit) | |
| public SCMCommand getNextCommand() { | ||
| lock.lock(); | ||
| try { | ||
| return commandQueue.poll(); | ||
| initTermOfLeaderSCM(); | ||
| if (!termOfLeaderSCM.isPresent()) { | ||
| return null; // not ready yet | ||
| } | ||
|
|
||
| while (true) { | ||
| SCMCommand<?> command = commandQueue.poll(); | ||
| if (command == null) { | ||
| return null; | ||
| } | ||
|
|
||
| updateTermOfLeaderSCM(command); | ||
| if (command.getTerm() == termOfLeaderSCM.get()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am confused on when
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean whether |
||
| return command; | ||
| } | ||
|
|
||
| LOG.warn("Detect and drop a SCMCommand {} from stale leader SCM," + | ||
| " stale term {}, latest term {}.", | ||
| command, command.getTerm(), termOfLeaderSCM.get()); | ||
| } | ||
| } finally { | ||
| lock.unlock(); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we use AtomicLong for termOfLeaderSCM?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good Point.
Since this is executed in a single thread, let's do this first. We surely need to consider the thread safety in future change.