Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Cannot communicate with HTTP/2 when reactor-netty is enabled ([#18599](https://github.com/opensearch-project/OpenSearch/pull/18599))
- Fix the visit of sub queries for HasParentQuery and HasChildQuery ([#18621](https://github.com/opensearch-project/OpenSearch/pull/18621))
- Fix the backward compatibility regression with COMPLEMENT for Regexp queries introduced in OpenSearch 3.0 ([#18640](https://github.com/opensearch-project/OpenSearch/pull/18640))
- Fix Replication lag computation ([#18602](https://github.com/opensearch-project/OpenSearch/pull/18602))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.opensearch.OpenSearchCorruptionException;
import org.opensearch.common.Nullable;
import org.opensearch.common.SetOnce;
import org.opensearch.common.time.DateUtils;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.common.util.concurrent.AbstractRunnable;
import org.opensearch.common.util.concurrent.ConcurrentCollections;
Expand All @@ -31,12 +32,13 @@
import org.opensearch.threadpool.ThreadPool;

import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;

import reactor.util.annotation.NonNull;

Expand All @@ -54,7 +56,7 @@ public class SegmentReplicator {
private final ReplicationCollection<SegmentReplicationTarget> onGoingReplications;
private final ReplicationCollection<MergedSegmentReplicationTarget> onGoingMergedSegmentReplications;
private final Map<ShardId, SegmentReplicationState> completedReplications = ConcurrentCollections.newConcurrentMap();
private final ConcurrentMap<ShardId, ConcurrentNavigableMap<Long, ReplicationCheckpointStats>> replicationCheckpointStats =
protected final ConcurrentMap<ShardId, ConcurrentNavigableMap<Long, ReplicationCheckpointStats>> replicationCheckpointStats =
ConcurrentCollections.newConcurrentMap();
private final ConcurrentMap<ShardId, ReplicationCheckpoint> primaryCheckpoint = ConcurrentCollections.newConcurrentMap();

Expand Down Expand Up @@ -167,9 +169,8 @@ public ReplicationStats getSegmentReplicationStats(final ShardId shardId) {

long bytesBehind = highestEntry.getValue().getBytesBehind();
long replicationLag = bytesBehind > 0L
? TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lowestEntry.getValue().getTimestamp())
? Duration.ofNanos(DateUtils.toLong(Instant.now()) - lowestEntry.getValue().getTimestamp()).toMillis()
: 0;

return new ReplicationStats(bytesBehind, bytesBehind, replicationLag);
}

Expand Down Expand Up @@ -217,7 +218,7 @@ protected void pruneCheckpointsUpToLastSync(final IndexShard indexShard) {
);

if (existingCheckpointStats != null && !existingCheckpointStats.isEmpty()) {
existingCheckpointStats.keySet().removeIf(key -> key < segmentInfoVersion);
existingCheckpointStats.keySet().removeIf(key -> key <= segmentInfoVersion);
Map.Entry<Long, ReplicationCheckpointStats> lastEntry = existingCheckpointStats.lastEntry();
if (lastEntry != null) {
lastEntry.getValue().setBytesBehind(calculateBytesBehind(latestCheckpoint, indexReplicationCheckPoint));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.opensearch.Version;
import org.opensearch.common.Nullable;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.time.DateUtils;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
Expand All @@ -19,6 +20,7 @@
import org.opensearch.index.store.StoreFileMetadata;

import java.io.IOException;
import java.time.Instant;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -56,11 +58,11 @@ private ReplicationCheckpoint(ShardId shardId, String codec) {
length = 0L;
this.codec = codec;
this.metadataMap = Collections.emptyMap();
this.createdTimeStamp = System.nanoTime();
this.createdTimeStamp = DateUtils.toLong(Instant.now());
}

public ReplicationCheckpoint(ShardId shardId, long primaryTerm, long segmentsGen, long segmentInfosVersion, String codec) {
this(shardId, primaryTerm, segmentsGen, segmentInfosVersion, 0L, codec, Collections.emptyMap(), System.nanoTime());
this(shardId, primaryTerm, segmentsGen, segmentInfosVersion, 0L, codec, Collections.emptyMap(), DateUtils.toLong(Instant.now()));
}

public ReplicationCheckpoint(
Expand All @@ -79,7 +81,7 @@ public ReplicationCheckpoint(
this.length = length;
this.codec = codec;
this.metadataMap = metadataMap;
this.createdTimeStamp = System.nanoTime();
this.createdTimeStamp = DateUtils.toLong(Instant.now());
}

public ReplicationCheckpoint(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateUtils;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.index.ReplicationStats;
Expand All @@ -38,6 +39,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -215,10 +217,10 @@ public void testGetSegmentReplicationStats_WhenNoReplication() {
assertEquals(0, replicationStats.maxBytesBehind);
}

public void testGetSegmentReplicationStats_WhileOnGoingReplicationAndPrimaryRefreshedToNewCheckPoint() {
public void testGetSegmentReplicationStats_WhileOnGoingReplicationAndPrimaryRefreshedToNewCheckPoint() throws InterruptedException {
ShardId shardId = new ShardId("index", "uuid", 0);
ReplicationCheckpoint firstReplicationCheckpoint = ReplicationCheckpoint.empty(shardId);

long baseTime = DateUtils.toLong(Instant.now());
StoreFileMetadata storeFileMetadata1 = new StoreFileMetadata("test-1", 500, "1", Version.LATEST, new BytesRef(500));
StoreFileMetadata storeFileMetadata2 = new StoreFileMetadata("test-2", 500, "1", Version.LATEST, new BytesRef(500));
Map<String, StoreFileMetadata> stringStoreFileMetadataMapOne = new HashMap<>();
Expand All @@ -232,7 +234,7 @@ public void testGetSegmentReplicationStats_WhileOnGoingReplicationAndPrimaryRefr
1000,
"",
stringStoreFileMetadataMapOne,
System.nanoTime() - TimeUnit.MINUTES.toNanos(1)
baseTime - 5_000_000
);

IndexShard replicaShard = mock(IndexShard.class);
Expand Down Expand Up @@ -260,7 +262,7 @@ public void testGetSegmentReplicationStats_WhileOnGoingReplicationAndPrimaryRefr
200,
"",
stringStoreFileMetadataMapTwo,
System.nanoTime() - TimeUnit.MINUTES.toNanos(1)
baseTime - 1_000_000
);

segmentReplicator.updateReplicationCheckpointStats(thirdReplicationCheckpoint, replicaShard);
Expand All @@ -276,6 +278,16 @@ public void testGetSegmentReplicationStats_WhileOnGoingReplicationAndPrimaryRefr
assertEquals(200, replicationStatsSecond.totalBytesBehind);
assertEquals(200, replicationStatsSecond.maxBytesBehind);
assertTrue(replicationStatsSecond.maxReplicationLag > 0);

// shard finished syncing to last checkpoint (sis 3)
when(replicaShard.getLatestReplicationCheckpoint()).thenReturn(thirdReplicationCheckpoint);
segmentReplicator.pruneCheckpointsUpToLastSync(replicaShard);
ReplicationStats finalStats = segmentReplicator.getSegmentReplicationStats(shardId);
assertEquals(0, finalStats.totalBytesBehind);
assertEquals(0, finalStats.maxBytesBehind);
assertEquals(0, finalStats.maxReplicationLag);
// shard is up to date, should not have any tracked stats
assertTrue(segmentReplicator.replicationCheckpointStats.get(shardId).isEmpty());
}

public void testGetSegmentReplicationStats_WhenCheckPointReceivedOutOfOrder() {
Expand Down
Loading