diff --git a/CHANGELOG.md b/CHANGELOG.md index 73bac28902e21..fa608891d0092 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Unwrap singleton DocValues in global ordinal value source of composite histogram aggregation ([#17740](https://github.com/opensearch-project/OpenSearch/pull/17740)) - Unwrap singleton DocValues in date histogram aggregation. ([#17643](https://github.com/opensearch-project/OpenSearch/pull/17643)) - Introduce 512 byte limit to search and ingest pipeline IDs ([#17786](https://github.com/opensearch-project/OpenSearch/pull/17786)) +- Avoid skewed segment replication lag metric ([#17831](https://github.com/opensearch-project/OpenSearch/pull/17831)) ### Dependencies - Bump `com.nimbusds:nimbus-jose-jwt` from 9.41.1 to 10.0.2 ([#17607](https://github.com/opensearch-project/OpenSearch/pull/17607), [#17669](https://github.com/opensearch-project/OpenSearch/pull/17669)) diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index 76ef45158e3d5..36ef322b282f2 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1307,7 +1307,8 @@ public synchronized void startReplicationLagTimers(ReplicationCheckpoint checkpo && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false && shouldSkipReplicationTimer(e.getKey()) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) - && cps.checkpointTimers.containsKey(latestReplicationCheckpoint)) { + && cps.checkpointTimers.containsKey(latestReplicationCheckpoint) + && cps.checkpointTimers.get(latestReplicationCheckpoint).startTime() == 0) { cps.checkpointTimers.get(latestReplicationCheckpoint).start(); } }); diff --git a/server/src/test/java/org/opensearch/index/seqno/ReplicationTrackerTests.java b/server/src/test/java/org/opensearch/index/seqno/ReplicationTrackerTests.java index 899e80965e4fd..b987c60dda333 100644 --- a/server/src/test/java/org/opensearch/index/seqno/ReplicationTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/ReplicationTrackerTests.java @@ -1870,6 +1870,8 @@ public void testSegmentReplicationCheckpointTracking() { tracker.setLatestReplicationCheckpoint(initialCheckpoint); tracker.startReplicationLagTimers(initialCheckpoint); + // retry start replication lag timers + tracker.startReplicationLagTimers(initialCheckpoint); tracker.setLatestReplicationCheckpoint(secondCheckpoint); tracker.startReplicationLagTimers(secondCheckpoint); tracker.setLatestReplicationCheckpoint(thirdCheckpoint);