Skip to content

Commit

Permalink
GEODE-9060: Remove the member from a copy of replicates as GII candid… (
Browse files Browse the repository at this point in the history
#6246)

* GEODE-9060: Remove the member from a copy of replicates as GII candidate if
it's not part of the same distributed system, but leave original replicates
unchanged.

(cherry picked from commit 76a5afd)
  • Loading branch information
gesterzhou committed Jan 3, 2022
1 parent 8a77331 commit 589fc30
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -877,11 +877,43 @@ public void testSplitBrain() {
Throwable thrown = catchThrowable(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
});
assertThat(thrown).isInstanceOf(ConflictingPersistentDataException.class);
assertThat(thrown)
.isInstanceOf(ConflictingPersistentDataException.class)
.hasMessageContaining("was not part of the same distributed system as the local data");
}
});
}

@Test
public void testRecoverableSplitBrain() {
vm2.invoke(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
});
vm0.invoke(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
putEntry("A", "B");
getCache().getRegion(regionName).close();
});

vm1.invoke(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
validateEntry("A", "B");
updateEntry("A", "C");
getCache().getRegion(regionName).close();
});

// VM0 doesn't know that VM1 ever existed so it will start up.
vm0.invoke(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
validateEntry("A", "C");
});

vm1.invoke(() -> {
createReplicateRegion(regionName, getDiskDirs(getVMId()));
validateEntry("A", "C");
});
}

/**
* Test to make sure that if if a member crashes while a GII is in progress, we wait for the
* member to come back for starting.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -510,18 +510,18 @@ public Set<PersistentMemberID> getPersistedMembers() {
public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
throws ReplyException {
PersistentStateQueryResults remoteStates = getMyStateOnMembers(replicates);
Set<InternalDistributedMember> copyOfReplicates = null;

persistenceAdvisorObserver.observe(regionPath);

boolean equal = false;
PersistentMemberID myId = getPersistentID();
for (Map.Entry<InternalDistributedMember, PersistentMemberState> entry : remoteStates
.getStateOnPeers().entrySet()) {
InternalDistributedMember member = entry.getKey();
PersistentMemberID remoteId = remoteStates.getPersistentIds().get(member);

PersistentMemberID myId = getPersistentID();
PersistentMemberState stateOnPeer = entry.getValue();

if (PersistentMemberState.REVOKED.equals(stateOnPeer)) {
throw new RevokedPersistentDataException(
String.format(
Expand All @@ -533,7 +533,19 @@ public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
String message = String.format(
"Region %s remote member %s with persistent data %s was not part of the same distributed system as the local data from %s",
regionPath, member, remoteId, myId);
throw new ConflictingPersistentDataException(message);
// Conceptually the removed member due to not knowing current member, should be equal to
// existing replicates.
// It can still be used as GII provider candidate. Use a copyOfReplicates to avoid modifying
// the replicates.
if (copyOfReplicates == null) {
copyOfReplicates = new HashSet<>(replicates);
}
copyOfReplicates.remove(member);
if (copyOfReplicates.isEmpty()) {
throw new ConflictingPersistentDataException(message);
} else {
logger.info(message);
}
}

if (myId != null && stateOnPeer == PersistentMemberState.EQUAL) {
Expand Down

0 comments on commit 589fc30

Please sign in to comment.