@@ -206,10 +206,19 @@ private void performStateRecovery(final boolean enforceRecoverAfterTime, final S
206206 if (enforceRecoverAfterTime && recoverAfterTime != null ) {
207207 if (scheduledRecovery .compareAndSet (false , true )) {
208208 logger .info ("delaying initial state recovery for [{}]. {}" , recoverAfterTime , reason );
209- threadPool .schedule (() -> {
210- if (recovered .compareAndSet (false , true )) {
211- logger .info ("recover_after_time [{}] elapsed. performing state recovery..." , recoverAfterTime );
212- recoveryRunnable .run ();
209+ threadPool .schedule (new AbstractRunnable () {
210+ @ Override
211+ public void onFailure (Exception e ) {
212+ logger .warn ("delayed state recovery failed" , e );
213+ resetRecoveredFlags ();
214+ }
215+
216+ @ Override
217+ protected void doRun () {
218+ if (recovered .compareAndSet (false , true )) {
219+ logger .info ("recover_after_time [{}] elapsed. performing state recovery..." , recoverAfterTime );
220+ recoveryRunnable .run ();
221+ }
213222 }
214223 }, recoverAfterTime , ThreadPool .Names .GENERIC );
215224 }
@@ -218,10 +227,8 @@ private void performStateRecovery(final boolean enforceRecoverAfterTime, final S
218227 threadPool .generic ().execute (new AbstractRunnable () {
219228 @ Override
220229 public void onFailure (final Exception e ) {
221- logger .warn ("Recovery failed" , e );
222- // we reset `recovered` in the listener don't reset it here otherwise there might be a race
223- // that resets it to false while a new recover is already running?
224- GatewayService .this .onFailure ("state recovery failed: " + e .getMessage ());
230+ logger .warn ("state recovery failed" , e );
231+ resetRecoveredFlags ();
225232 }
226233
227234 @ Override
@@ -233,11 +240,9 @@ protected void doRun() {
233240 }
234241 }
235242
236- private void onFailure ( final String message ) {
243+ private void resetRecoveredFlags ( ) {
237244 recovered .set (false );
238245 scheduledRecovery .set (false );
239- // don't remove the block here, we don't want to allow anything in such a case
240- logger .info ("metadata state not restored, reason: {}" , message );
241246 }
242247
243248 class RecoverStateUpdateTask extends ClusterStateUpdateTask {
@@ -257,10 +262,16 @@ public void clusterStateProcessed(final String source, final ClusterState oldSta
257262 logger .info ("recovered [{}] indices into cluster_state" , newState .metaData ().indices ().size ());
258263 }
259264
265+ @ Override
266+ public void onNoLongerMaster (String source ) {
267+ logger .debug ("stepped down as master before recovering state [{}]" , source );
268+ resetRecoveredFlags ();
269+ }
270+
260271 @ Override
261272 public void onFailure (final String source , final Exception e ) {
262273 logger .info (() -> new ParameterizedMessage ("unexpected failure during [{}]" , source ), e );
263- GatewayService . this . onFailure ( "failed to update cluster state" );
274+ resetRecoveredFlags ( );
264275 }
265276 }
266277
@@ -280,7 +291,8 @@ public ClusterState execute(final ClusterState currentState) {
280291
281292 @ Override
282293 public void onFailure (final String msg ) {
283- GatewayService .this .onFailure (msg );
294+ logger .info ("state recovery failed: {}" , msg );
295+ resetRecoveredFlags ();
284296 }
285297
286298 }
0 commit comments