Skip to content

Commit

Permalink
[Segment Replication] Bump segment infos counter before commit during…
Browse files Browse the repository at this point in the history
… replica promotion (#4365)

* [Segment Replication] Bump segment infos counter before commit during replica promotion

Signed-off-by: Suraj Singh <[email protected]>

* Add changelog entry

Signed-off-by: Suraj Singh <[email protected]>

Signed-off-by: Suraj Singh <[email protected]>
  • Loading branch information
dreamer-89 authored Sep 2, 2022
1 parent 70d911c commit c885686
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Add timeout on Mockito.verify to reduce flakyness in testReplicationOnDone test([#4314](https://github.com/opensearch-project/OpenSearch/pull/4314))
- Commit workflow for dependabot changelog helper ([#4331](https://github.com/opensearch-project/OpenSearch/pull/4331))
- Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225))
- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365))
- Bugs for dependabot changelog verifier workflow ([#4364](https://github.com/opensearch-project/OpenSearch/pull/4364))
- Fix flaky random test `NRTReplicationEngineTests.testUpdateSegments` ([#4352](https://github.com/opensearch-project/OpenSearch/pull/4352))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ public class NRTReplicationEngine extends Engine {
private final LocalCheckpointTracker localCheckpointTracker;
private final WriteOnlyTranslogManager translogManager;

private static final int SI_COUNTER_INCREMENT = 10;

public NRTReplicationEngine(EngineConfig engineConfig) {
super(engineConfig);
store.incRef();
Expand Down Expand Up @@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th
public void commitSegmentInfos() throws IOException {
// TODO: This method should wait for replication events to finalize.
final SegmentInfos latestSegmentInfos = getLatestSegmentInfos();
/*
This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied
from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is
used to generate new segment file names. The ideal solution is to identify the counter from previous primary.
*/
latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT;
latestSegmentInfos.changed();
store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint());
translogManager.syncTranslog();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ public void testCommitSegmentInfos() throws Exception {
// ensure getLatestSegmentInfos returns an updated infos ref with correct userdata.
final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos();
assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration());
assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion());
assertEquals(previousInfos.counter, latestSegmentInfos.counter);
Map<String, String> userData = latestSegmentInfos.getUserData();
assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint());
assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO)));
Expand Down

0 comments on commit c885686

Please sign in to comment.