Skip to content

Commit 5ded950

Browse files
committed
[Segment Replication] Bump segment infos counter before commit during replica promotion (opensearch-project#4365)
* [Segment Replication] Bump segment infos counter before commit during replica promotion Signed-off-by: Suraj Singh <[email protected]> * Add changelog entry Signed-off-by: Suraj Singh <[email protected]> Signed-off-by: Suraj Singh <[email protected]>
1 parent 1edb733 commit 5ded950

File tree

3 files changed

+12
-0
lines changed

3 files changed

+12
-0
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
4242
- PR reference to checkout code for changelog verifier ([#4296](https://github.com/opensearch-project/OpenSearch/pull/4296))
4343
- Restore using the class ClusterInfoRequest and ClusterInfoRequestBuilder from package 'org.opensearch.action.support.master.info' for subclasses ([#4324](https://github.com/opensearch-project/OpenSearch/pull/4324))
4444
- Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225))
45+
- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365))
4546

4647
### Security
4748

server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java

+9
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ public class NRTReplicationEngine extends Engine implements LifecycleAware {
5656
private final LocalCheckpointTracker localCheckpointTracker;
5757
private final WriteOnlyTranslogManager translogManager;
5858

59+
private static final int SI_COUNTER_INCREMENT = 10;
60+
5961
public NRTReplicationEngine(EngineConfig engineConfig) {
6062
super(engineConfig);
6163
store.incRef();
@@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th
142144
public void commitSegmentInfos() throws IOException {
143145
// TODO: This method should wait for replication events to finalize.
144146
final SegmentInfos latestSegmentInfos = getLatestSegmentInfos();
147+
/*
148+
This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied
149+
from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is
150+
used to generate new segment file names. The ideal solution is to identify the counter from previous primary.
151+
*/
152+
latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT;
153+
latestSegmentInfos.changed();
145154
store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint());
146155
translogManager.syncTranslog();
147156
}

server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java

+2
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ public void testCommitSegmentInfos() throws Exception {
245245
// ensure getLatestSegmentInfos returns an updated infos ref with correct userdata.
246246
final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos();
247247
assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration());
248+
assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion());
249+
assertEquals(previousInfos.counter, latestSegmentInfos.counter);
248250
Map<String, String> userData = latestSegmentInfos.getUserData();
249251
assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint());
250252
assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO)));

0 commit comments

Comments
 (0)