Skip to content

Commit 400794e

Browse files
committed
Renew all the leases at once, rather than one at a time
1 parent d80e57f commit 400794e

File tree

2 files changed

+42
-24
lines changed

2 files changed

+42
-24
lines changed

server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import java.util.stream.Collectors;
6161
import java.util.stream.LongStream;
6262
import java.util.stream.Stream;
63+
import java.util.stream.StreamSupport;
6364

6465
/**
6566
* This class is responsible for tracking the replication group with its progress and safety markers (local and global checkpoints).
@@ -462,6 +463,7 @@ public static String getPeerRecoveryRetentionLeaseId(ShardRouting shardRouting)
462463
*/
463464
public synchronized void renewPeerRecoveryRetentionLeases() {
464465
assert primaryMode;
466+
assert invariant();
465467

466468
/*
467469
* Peer-recovery retention leases never expire while the associated shard is assigned, but we must still renew them occasionally in
@@ -471,28 +473,40 @@ public synchronized void renewPeerRecoveryRetentionLeases() {
471473
*/
472474
final long renewalTimeMillis = currentTimeMillisSupplier.getAsLong() - indexSettings.getRetentionLeaseMillis() / 2;
473475

474-
for (ShardRouting shardRouting : routingTable) {
475-
if (shardRouting.assignedToNode()) {
476-
final CheckpointState checkpointState = checkpoints.get(shardRouting.allocationId().getId());
476+
/*
477+
* If any of the peer-recovery retention leases need renewal, it's a good opportunity to renew them all.
478+
*/
479+
final boolean renewalNeeded = StreamSupport.stream(routingTable.spliterator(), false).filter(ShardRouting::assignedToNode)
480+
.anyMatch(shardRouting -> {
477481
final RetentionLease retentionLease = retentionLeases.get(getPeerRecoveryRetentionLeaseId(shardRouting));
478482
if (retentionLease == null) {
479-
if (checkpointState.tracked) {
480-
/*
481-
* BWC: We got here here via a rolling upgrade from an older version that doesn't create peer recovery retention
482-
* leases for every shard copy. TODO create leases lazily
483-
*/
484-
assert indexSettings.getIndexVersionCreated().before(Version.V_8_0_0) : indexSettings.getIndexVersionCreated();
485-
}
486-
} else {
487-
if (retentionLease.retainingSequenceNumber() <= checkpointState.globalCheckpoint
488-
|| retentionLease.timestamp() <= renewalTimeMillis) {
483+
/*
484+
* If this shard copy is tracked then we got here here via a rolling upgrade from an older version that doesn't
485+
* create peer recovery retention leases for every shard copy. TODO create leases lazily in that situation.
486+
*/
487+
assert checkpoints.get(shardRouting.allocationId().getId()).tracked == false
488+
|| indexSettings.getIndexVersionCreated().before(Version.V_8_0_0);
489+
return false;
490+
}
491+
return retentionLease.timestamp() <= renewalTimeMillis
492+
|| retentionLease.retainingSequenceNumber() <= checkpoints.get(shardRouting.allocationId().getId()).globalCheckpoint;
493+
});
494+
495+
if (renewalNeeded) {
496+
for (ShardRouting shardRouting : routingTable) {
497+
if (shardRouting.assignedToNode()) {
498+
final RetentionLease retentionLease = retentionLeases.get(getPeerRecoveryRetentionLeaseId(shardRouting));
499+
if (retentionLease != null) {
500+
final CheckpointState checkpointState = checkpoints.get(shardRouting.allocationId().getId());
489501
renewRetentionLease(getPeerRecoveryRetentionLeaseId(shardRouting),
490502
Math.max(0L, checkpointState.globalCheckpoint + 1L),
491503
PEER_RECOVERY_RETENTION_LEASE_SOURCE);
492504
}
493505
}
494506
}
495507
}
508+
509+
assert invariant();
496510
}
497511

498512
public static class CheckpointState implements Writeable {

server/src/test/java/org/elasticsearch/index/seqno/ReplicationTrackerTests.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,12 +1094,6 @@ public void testPeerRecoveryRetentionLeaseCreationAndRenewal() {
10941094
tracker.renewPeerRecoveryRetentionLeases();
10951095
assertTrue("expired extra lease", tracker.getRetentionLeases(true).v1());
10961096

1097-
1098-
for (RetentionLease retentionLease : tracker.getRetentionLeases().leases()) {
1099-
// update all leases' timestamps so they don't need a time-based renewal for a while
1100-
tracker.renewRetentionLease(retentionLease.id(), retentionLease.retainingSequenceNumber(), retentionLease.source());
1101-
}
1102-
11031097
final AllocationId advancingAllocationId
11041098
= initializingAllocationIds.isEmpty() || rarely() ? primaryId : randomFrom(initializingAllocationIds);
11051099
final String advancingLeaseId = retentionLeaseFromAllocationId.apply(advancingAllocationId).id();
@@ -1117,14 +1111,24 @@ public void testPeerRecoveryRetentionLeaseCreationAndRenewal() {
11171111
tracker.renewPeerRecoveryRetentionLeases();
11181112
assertThat("immediate renewal is a no-op", tracker.getRetentionLeases().version(), equalTo(initialVersion));
11191113

1120-
final long shorterThanRenewalTime = randomLongBetween(0L, peerRecoveryRetentionLeaseRenewalTimeMillis - 1);
1121-
currentTimeMillis.addAndGet(shorterThanRenewalTime);
1122-
tracker.renewPeerRecoveryRetentionLeases();
1123-
assertThat("renewal is a no-op after a short time", tracker.getRetentionLeases().version(), equalTo(initialVersion));
1114+
//noinspection OptionalGetWithoutIsPresent
1115+
final long millisUntilFirstRenewal
1116+
= tracker.getRetentionLeases().leases().stream().mapToLong(RetentionLease::timestamp).min().getAsLong()
1117+
+ peerRecoveryRetentionLeaseRenewalTimeMillis
1118+
- currentTimeMillis.get();
1119+
1120+
if (millisUntilFirstRenewal != 0) {
1121+
final long shorterThanRenewalTime = randomLongBetween(0L, millisUntilFirstRenewal - 1);
1122+
currentTimeMillis.addAndGet(shorterThanRenewalTime);
1123+
tracker.renewPeerRecoveryRetentionLeases();
1124+
assertThat("renewal is a no-op after a short time", tracker.getRetentionLeases().version(), equalTo(initialVersion));
1125+
currentTimeMillis.addAndGet(millisUntilFirstRenewal - shorterThanRenewalTime);
1126+
}
11241127

1125-
currentTimeMillis.addAndGet(peerRecoveryRetentionLeaseRenewalTimeMillis - shorterThanRenewalTime);
11261128
tracker.renewPeerRecoveryRetentionLeases();
11271129
assertThat("renewal happens after a sufficiently long time", tracker.getRetentionLeases().version(), greaterThan(initialVersion));
1130+
assertTrue("all leases were renewed",
1131+
tracker.getRetentionLeases().leases().stream().allMatch(l -> l.timestamp() == currentTimeMillis.get()));
11281132

11291133
assertThat("test ran for too long, potentially leading to overflow",
11301134
currentTimeMillis.get(), lessThanOrEqualTo(testStartTimeMillis + maximumTestTimeMillis));

0 commit comments

Comments
 (0)