Skip to content

Commit 49cf5c2

Browse files
committed
Reduce logging noise when stepping down as master before state recovery (#39950)
Reduces the logging noise from the state recovery component when there are duelling elections. Relates to #32006
1 parent 57705ec commit 49cf5c2

File tree

1 file changed

+25
-13
lines changed

1 file changed

+25
-13
lines changed

server/src/main/java/org/elasticsearch/gateway/GatewayService.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,19 @@ private void performStateRecovery(final boolean enforceRecoverAfterTime, final S
206206
if (enforceRecoverAfterTime && recoverAfterTime != null) {
207207
if (scheduledRecovery.compareAndSet(false, true)) {
208208
logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason);
209-
threadPool.schedule(() -> {
210-
if (recovered.compareAndSet(false, true)) {
211-
logger.info("recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime);
212-
recoveryRunnable.run();
209+
threadPool.schedule(new AbstractRunnable() {
210+
@Override
211+
public void onFailure(Exception e) {
212+
logger.warn("delayed state recovery failed", e);
213+
resetRecoveredFlags();
214+
}
215+
216+
@Override
217+
protected void doRun() {
218+
if (recovered.compareAndSet(false, true)) {
219+
logger.info("recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime);
220+
recoveryRunnable.run();
221+
}
213222
}
214223
}, recoverAfterTime, ThreadPool.Names.GENERIC);
215224
}
@@ -218,10 +227,8 @@ private void performStateRecovery(final boolean enforceRecoverAfterTime, final S
218227
threadPool.generic().execute(new AbstractRunnable() {
219228
@Override
220229
public void onFailure(final Exception e) {
221-
logger.warn("Recovery failed", e);
222-
// we reset `recovered` in the listener don't reset it here otherwise there might be a race
223-
// that resets it to false while a new recover is already running?
224-
GatewayService.this.onFailure("state recovery failed: " + e.getMessage());
230+
logger.warn("state recovery failed", e);
231+
resetRecoveredFlags();
225232
}
226233

227234
@Override
@@ -233,11 +240,9 @@ protected void doRun() {
233240
}
234241
}
235242

236-
private void onFailure(final String message) {
243+
private void resetRecoveredFlags() {
237244
recovered.set(false);
238245
scheduledRecovery.set(false);
239-
// don't remove the block here, we don't want to allow anything in such a case
240-
logger.info("metadata state not restored, reason: {}", message);
241246
}
242247

243248
class RecoverStateUpdateTask extends ClusterStateUpdateTask {
@@ -257,10 +262,16 @@ public void clusterStateProcessed(final String source, final ClusterState oldSta
257262
logger.info("recovered [{}] indices into cluster_state", newState.metaData().indices().size());
258263
}
259264

265+
@Override
266+
public void onNoLongerMaster(String source) {
267+
logger.debug("stepped down as master before recovering state [{}]", source);
268+
resetRecoveredFlags();
269+
}
270+
260271
@Override
261272
public void onFailure(final String source, final Exception e) {
262273
logger.info(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
263-
GatewayService.this.onFailure("failed to update cluster state");
274+
resetRecoveredFlags();
264275
}
265276
}
266277

@@ -280,7 +291,8 @@ public ClusterState execute(final ClusterState currentState) {
280291

281292
@Override
282293
public void onFailure(final String msg) {
283-
GatewayService.this.onFailure(msg);
294+
logger.info("state recovery failed: {}", msg);
295+
resetRecoveredFlags();
284296
}
285297

286298
}

0 commit comments

Comments
 (0)