Skip to content

Commit 7dcd81b

Browse files
authored
Throw back replica local checkpoint on new primary
This commit causes a replica to throwback its local checkpoint to the global checkpoint when learning of a new primary through a replica operation. Relates #25452
1 parent 7c637a0 commit 7dcd81b

File tree

5 files changed

+166
-20
lines changed

5 files changed

+166
-20
lines changed

core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,19 @@ public synchronized void markSeqNoAsCompleted(final long seqNo) {
121121
}
122122
}
123123

124+
/**
125+
* Resets the checkpoint to the specified value.
126+
*
127+
* @param checkpoint the local checkpoint to reset this tracker to
128+
*/
129+
synchronized void resetCheckpoint(final long checkpoint) {
130+
assert checkpoint != SequenceNumbersService.UNASSIGNED_SEQ_NO;
131+
assert checkpoint <= this.checkpoint;
132+
processedSeqNo.clear();
133+
firstProcessedSeqNo = checkpoint + 1;
134+
this.checkpoint = checkpoint;
135+
}
136+
124137
/**
125138
* The current checkpoint which can be advanced by {@link #markSeqNoAsCompleted(long)}.
126139
*

core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ public void markSeqNoAsCompleted(final long seqNo) {
106106
localCheckpointTracker.markSeqNoAsCompleted(seqNo);
107107
}
108108

109+
/**
110+
* Resets the local checkpoint to the specified value.
111+
*
112+
* @param localCheckpoint the local checkpoint to reset to
113+
*/
114+
public void resetLocalCheckpoint(final long localCheckpoint) {
115+
localCheckpointTracker.resetCheckpoint(localCheckpoint);
116+
}
117+
109118
/**
110119
* The current sequence number stats.
111120
*

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2058,6 +2058,19 @@ public void acquireReplicaOperationPermit(final long operationPrimaryTerm, final
20582058
"shard term already update. op term [" + operationPrimaryTerm + "], shardTerm [" + primaryTerm + "]";
20592059
primaryTerm = operationPrimaryTerm;
20602060
updateGlobalCheckpointOnReplica(globalCheckpoint);
2061+
final long currentGlobalCheckpoint = getGlobalCheckpoint();
2062+
final long localCheckpoint;
2063+
if (currentGlobalCheckpoint == SequenceNumbersService.UNASSIGNED_SEQ_NO) {
2064+
localCheckpoint = SequenceNumbersService.NO_OPS_PERFORMED;
2065+
} else {
2066+
localCheckpoint = currentGlobalCheckpoint;
2067+
}
2068+
logger.trace(
2069+
"detected new primary with primary term [{}], resetting local checkpoint from [{}] to [{}]",
2070+
operationPrimaryTerm,
2071+
getLocalCheckpoint(),
2072+
localCheckpoint);
2073+
getEngine().seqNoService().resetLocalCheckpoint(localCheckpoint);
20612074
getEngine().getTranslog().rollGeneration();
20622075
});
20632076
globalCheckpointUpdated = true;

core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.stream.Collectors;
3939
import java.util.stream.IntStream;
4040

41+
import static org.hamcrest.Matchers.empty;
4142
import static org.hamcrest.Matchers.equalTo;
4243
import static org.hamcrest.Matchers.isOneOf;
4344

@@ -236,4 +237,23 @@ public void testWaitForOpsToComplete() throws BrokenBarrierException, Interrupte
236237

237238
thread.join();
238239
}
240+
241+
public void testResetCheckpoint() {
242+
final int operations = 1024 - scaledRandomIntBetween(0, 1024);
243+
int maxSeqNo = Math.toIntExact(SequenceNumbersService.NO_OPS_PERFORMED);
244+
for (int i = 0; i < operations; i++) {
245+
if (!rarely()) {
246+
tracker.markSeqNoAsCompleted(i);
247+
maxSeqNo = i;
248+
}
249+
}
250+
251+
final int localCheckpoint =
252+
randomIntBetween(Math.toIntExact(SequenceNumbersService.NO_OPS_PERFORMED), Math.toIntExact(tracker.getCheckpoint()));
253+
tracker.resetCheckpoint(localCheckpoint);
254+
assertThat(tracker.getCheckpoint(), equalTo((long) localCheckpoint));
255+
assertThat(tracker.getMaxSeqNo(), equalTo((long) maxSeqNo));
256+
assertThat(tracker.processedSeqNo, empty());
257+
assertThat(tracker.generateSeqNo(), equalTo((long) (maxSeqNo + 1)));
258+
}
239259
}

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 111 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
import org.elasticsearch.index.mapper.ParsedDocument;
8181
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
8282
import org.elasticsearch.index.mapper.SourceToParse;
83+
import org.elasticsearch.index.seqno.SequenceNumbers;
8384
import org.elasticsearch.index.seqno.SequenceNumbersService;
8485
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
8586
import org.elasticsearch.index.store.Store;
@@ -142,7 +143,6 @@
142143
import static org.hamcrest.Matchers.hasSize;
143144
import static org.hamcrest.Matchers.hasToString;
144145
import static org.hamcrest.Matchers.instanceOf;
145-
import static org.hamcrest.Matchers.not;
146146
import static org.hamcrest.Matchers.nullValue;
147147

148148
/**
@@ -405,26 +405,10 @@ public void testPrimaryFillsSeqNoGapsOnPromotion() throws Exception {
405405

406406
// most of the time this is large enough that most of the time there will be at least one gap
407407
final int operations = 1024 - scaledRandomIntBetween(0, 1024);
408-
int max = Math.toIntExact(SequenceNumbersService.NO_OPS_PERFORMED);
409-
boolean gap = false;
410-
for (int i = 0; i < operations; i++) {
411-
if (!rarely()) {
412-
final String id = Integer.toString(i);
413-
SourceToParse sourceToParse = SourceToParse.source(indexShard.shardId().getIndexName(), "test", id,
414-
new BytesArray("{}"), XContentType.JSON);
415-
indexShard.applyIndexOperationOnReplica(i, indexShard.getPrimaryTerm(),
416-
1, VersionType.EXTERNAL, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, sourceToParse,
417-
getMappingUpdater(indexShard, sourceToParse.type()));
418-
max = i;
419-
} else {
420-
gap = true;
421-
}
422-
}
408+
final Result result = indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbersService.NO_OPS_PERFORMED));
423409

424-
final int maxSeqNo = max;
425-
if (gap) {
426-
assertThat(indexShard.getLocalCheckpoint(), not(equalTo(maxSeqNo)));
427-
}
410+
final int maxSeqNo = result.maxSeqNo;
411+
final boolean gap = result.gap;
428412

429413
// promote the replica
430414
final ShardRouting replicaRouting = indexShard.routingEntry();
@@ -626,6 +610,12 @@ public void onFailure(Exception e) {
626610
}
627611
newGlobalCheckPoint = randomIntBetween((int) indexShard.getGlobalCheckpoint(), (int) localCheckPoint);
628612
}
613+
final long expectedLocalCheckpoint;
614+
if (newGlobalCheckPoint == SequenceNumbersService.UNASSIGNED_SEQ_NO) {
615+
expectedLocalCheckpoint = SequenceNumbersService.NO_OPS_PERFORMED;
616+
} else {
617+
expectedLocalCheckpoint = newGlobalCheckPoint;
618+
}
629619
// but you can not increment with a new primary term until the operations on the older primary term complete
630620
final Thread thread = new Thread(() -> {
631621
try {
@@ -637,6 +627,7 @@ public void onFailure(Exception e) {
637627
@Override
638628
public void onResponse(Releasable releasable) {
639629
assertThat(indexShard.getPrimaryTerm(), equalTo(newPrimaryTerm));
630+
assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint));
640631
assertThat(indexShard.getGlobalCheckpoint(), equalTo(newGlobalCheckPoint));
641632
onResponse.set(true);
642633
releasable.close();
@@ -697,6 +688,7 @@ private void finish() {
697688
assertTrue(onResponse.get());
698689
assertNull(onFailure.get());
699690
assertThat(indexShard.getTranslog().getGeneration().translogFileGeneration, equalTo(translogGen + 1));
691+
assertThat(indexShard.getLocalCheckpoint(), equalTo(expectedLocalCheckpoint));
700692
assertThat(indexShard.getGlobalCheckpoint(), equalTo(newGlobalCheckPoint));
701693
}
702694
}
@@ -707,6 +699,56 @@ private void finish() {
707699
closeShards(indexShard);
708700
}
709701

702+
public void testThrowBackLocalCheckpointOnReplica() throws IOException, InterruptedException {
703+
final IndexShard indexShard = newStartedShard(false);
704+
705+
// most of the time this is large enough that most of the time there will be at least one gap
706+
final int operations = 1024 - scaledRandomIntBetween(0, 1024);
707+
indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(SequenceNumbersService.NO_OPS_PERFORMED));
708+
709+
final long globalCheckpointOnReplica =
710+
randomIntBetween(
711+
Math.toIntExact(SequenceNumbersService.UNASSIGNED_SEQ_NO),
712+
Math.toIntExact(indexShard.getLocalCheckpoint()));
713+
indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica);
714+
715+
final int globalCheckpoint =
716+
randomIntBetween(
717+
Math.toIntExact(SequenceNumbersService.UNASSIGNED_SEQ_NO),
718+
Math.toIntExact(indexShard.getLocalCheckpoint()));
719+
final CountDownLatch latch = new CountDownLatch(1);
720+
indexShard.acquireReplicaOperationPermit(
721+
indexShard.primaryTerm + 1,
722+
globalCheckpoint,
723+
new ActionListener<Releasable>() {
724+
@Override
725+
public void onResponse(final Releasable releasable) {
726+
releasable.close();
727+
latch.countDown();
728+
}
729+
730+
@Override
731+
public void onFailure(final Exception e) {
732+
733+
}
734+
},
735+
ThreadPool.Names.SAME);
736+
737+
latch.await();
738+
if (globalCheckpointOnReplica == SequenceNumbersService.UNASSIGNED_SEQ_NO
739+
&& globalCheckpoint == SequenceNumbersService.UNASSIGNED_SEQ_NO) {
740+
assertThat(indexShard.getLocalCheckpoint(), equalTo(SequenceNumbersService.NO_OPS_PERFORMED));
741+
} else {
742+
assertThat(indexShard.getLocalCheckpoint(), equalTo(Math.max(globalCheckpoint, globalCheckpointOnReplica)));
743+
}
744+
745+
// ensure that after the local checkpoint throw back and indexing again, the local checkpoint advances
746+
final Result result = indexOnReplicaWithGaps(indexShard, operations, Math.toIntExact(indexShard.getLocalCheckpoint()));
747+
assertThat(indexShard.getLocalCheckpoint(), equalTo((long) result.localCheckpoint));
748+
749+
closeShards(indexShard);
750+
}
751+
710752
public void testConcurrentTermIncreaseOnReplicaShard() throws BrokenBarrierException, InterruptedException, IOException {
711753
final IndexShard indexShard = newStartedShard(false);
712754

@@ -1966,6 +2008,55 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept
19662008
closeShards(newShard);
19672009
}
19682010

2011+
class Result {
2012+
private final int localCheckpoint;
2013+
private final int maxSeqNo;
2014+
private final boolean gap;
2015+
2016+
Result(final int localCheckpoint, final int maxSeqNo, final boolean gap) {
2017+
this.localCheckpoint = localCheckpoint;
2018+
this.maxSeqNo = maxSeqNo;
2019+
this.gap = gap;
2020+
}
2021+
}
2022+
2023+
/**
2024+
* Index on the specified shard while introducing sequence number gaps.
2025+
*
2026+
* @param indexShard the shard
2027+
* @param operations the number of operations
2028+
* @param offset the starting sequence number
2029+
* @return a pair of the maximum sequence number and whether or not a gap was introduced
2030+
* @throws IOException if an I/O exception occurs while indexing on the shard
2031+
*/
2032+
private Result indexOnReplicaWithGaps(
2033+
final IndexShard indexShard,
2034+
final int operations,
2035+
final int offset) throws IOException {
2036+
int localCheckpoint = offset;
2037+
int max = offset;
2038+
boolean gap = false;
2039+
for (int i = offset + 1; i < operations; i++) {
2040+
if (!rarely()) {
2041+
final String id = Integer.toString(i);
2042+
SourceToParse sourceToParse = SourceToParse.source(indexShard.shardId().getIndexName(), "test", id,
2043+
new BytesArray("{}"), XContentType.JSON);
2044+
indexShard.applyIndexOperationOnReplica(i, indexShard.getPrimaryTerm(),
2045+
1, VersionType.EXTERNAL, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false, sourceToParse,
2046+
getMappingUpdater(indexShard, sourceToParse.type()));
2047+
if (!gap && i == localCheckpoint + 1) {
2048+
localCheckpoint++;
2049+
}
2050+
max = i;
2051+
} else {
2052+
gap = true;
2053+
}
2054+
}
2055+
assert localCheckpoint == indexShard.getLocalCheckpoint();
2056+
assert !gap || (localCheckpoint != max);
2057+
return new Result(localCheckpoint, max, gap);
2058+
}
2059+
19692060
/** A dummy repository for testing which just needs restore overridden */
19702061
private abstract static class RestoreOnlyRepository extends AbstractLifecycleComponent implements Repository {
19712062
private final String indexName;

0 commit comments

Comments
 (0)