Skip to content

Commit cafab0d

Browse files
committed
GG-20702 Fix invalid partition clearing.
1 parent afdc96e commit cafab0d

File tree

5 files changed

+47
-3
lines changed

5 files changed

+47
-3
lines changed

modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtLocalPartition.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,8 @@ private boolean casState(long state, GridDhtPartitionState toState) {
606606
*/
607607
public boolean own() {
608608
while (true) {
609+
assert !clear : "Could not own clearing partition " + this;
610+
609611
long state = this.state.get();
610612

611613
GridDhtPartitionState partState = getPartState(state);

modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
import static org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_PART_DATA_LOST;
7676
import static org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
77+
import static org.apache.ignite.internal.processors.cache.distributed.dht.preloader.ExchangeType.ALL;
7778
import static org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.EVICTED;
7879
import static org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.LOST;
7980
import static org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.MOVING;
@@ -763,7 +764,8 @@ private boolean partitionLocalNode(int p, AffinityTopologyVersion topVer) {
763764

764765
long updateSeq = this.updateSeq.incrementAndGet();
765766

766-
if (!ctx.localNode().isClient()) {
767+
// Skip partition updates in case of not real exchange.
768+
if (!ctx.localNode().isClient() && exchFut.exchangeType() == ALL) {
767769
for (int p = 0; p < partitions; p++) {
768770
GridDhtLocalPartition locPart = localPartition0(p, topVer, false, true);
769771

modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/PartitionsEvictManager.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ private void showProgress() {
368368
", grpId=" + grp.groupId() +
369369
", remainingPartsToEvict=" + (totalTasks.get() - taskInProgress) +
370370
", partsEvictInProgress=" + taskInProgress +
371-
", totalParts= " + grp.topology().localPartitions().size() + "]");
371+
", totalParts=" + grp.topology().localPartitions().size() + "]");
372372
}
373373
}
374374

modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/IgnitePdsCacheWalDisabledOnRebalancingTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,43 @@ public void testRebalancedPartitionsOwningWithConcurrentAffinityChange() throws
345345
verifyCache(ig1.cache(CACHE3_NAME), GENERATING_FUNC);
346346
}
347347

348+
/**
349+
* Scenario: when rebalanced MOVING partitions are owning by checkpointer,
350+
* concurrent no-op exchange should not trigger partition clearing.
351+
*
352+
* @throws Exception If failed.
353+
*/
354+
@Test
355+
public void testRebalancedPartitionsOwningWithAffinitySwitch() throws Exception {
356+
Ignite ig0 = startGridsMultiThreaded(4);
357+
fillCache(ig0.dataStreamer(CACHE3_NAME), CACHE_SIZE, GENERATING_FUNC);
358+
359+
// Stop idx=2 to prepare for baseline topology change later.
360+
stopGrid(2);
361+
362+
// Stop idx=1 and cleanup LFS to trigger full rebalancing after it restart.
363+
String ig1Name = "node01-" + grid(1).localNode().consistentId();
364+
stopGrid(1);
365+
cleanPersistenceFiles(ig1Name);
366+
367+
// Blocking fileIO and blockMessagePredicate to block checkpointer and rebalancing for node idx=1.
368+
useBlockingFileIO = true;
369+
370+
// Enable blocking checkpointer on node idx=1 (see BlockingCheckpointFileIOFactory).
371+
fileIoBlockingSemaphore.drainPermits();
372+
373+
// Wait for rebalance (all partitions will be in MOVING state until cp is finished).
374+
startGrid(1).cachex(CACHE3_NAME).context().group().preloader().rebalanceFuture().get();
375+
376+
startGrid("client");
377+
378+
fileIoBlockingSemaphore.release(Integer.MAX_VALUE);
379+
380+
awaitPartitionMapExchange();
381+
382+
assertPartitionsSame(idleVerify(grid(0), CACHE3_NAME));
383+
}
384+
348385
/** FileIOFactory implementation that enables blocking of writes to disk so checkpoint can be blocked. */
349386
private static class BlockingCheckpointFileIOFactory implements FileIOFactory {
350387
/** Serial version uid. */

modules/core/src/test/java/org/apache/ignite/testframework/junits/common/GridCommonAbstractTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,8 @@ protected void printPartitionState(String cacheName, int firstParts) {
872872

873873
sb.append("nodeId=")
874874
.append(k.context().localNodeId())
875+
.append(" consistentId=")
876+
.append(k.localNode().consistentId())
875877
.append(" isDone=")
876878
.append(syncFut.isDone())
877879
.append("\n");
@@ -952,7 +954,8 @@ protected void printPartitionState(String cacheName, int firstParts) {
952954
.append(part == null ? "NA" : part.dataStore().partUpdateCounter())
953955
.append(" fullSize=")
954956
.append(part == null ? "NA" : part.fullSize())
955-
.append(" state=").append(part.state());
957+
.append(" state=").append(part.state())
958+
.append(" reservations=").append(part.reservations());
956959
}
957960
else
958961
sb.append(p).append(" is null");

0 commit comments

Comments
 (0)