@@ -1192,11 +1192,9 @@ public Store.MetadataSnapshot snapshotStoreMetadata() throws IOException {
11921192 synchronized (engineMutex ) {
11931193 // if the engine is not running, we can access the store directly, but we need to make sure no one starts
11941194 // the engine on us. If the engine is running, we can get a snapshot via the deletion policy of the engine.
1195- synchronized (mutex ) {
1196- final Engine engine = getEngineOrNull ();
1197- if (engine != null ) {
1198- indexCommit = engine .acquireLastIndexCommit (false );
1199- }
1195+ final Engine engine = getEngineOrNull ();
1196+ if (engine != null ) {
1197+ indexCommit = engine .acquireLastIndexCommit (false );
12001198 }
12011199 if (indexCommit == null ) {
12021200 return store .getMetadata (null , true );
@@ -1320,9 +1318,11 @@ public CacheHelper getReaderCacheHelper() {
13201318 }
13211319
13221320 public void close (String reason , boolean flushEngine ) throws IOException {
1323- synchronized (mutex ) {
1321+ synchronized (engineMutex ) {
13241322 try {
1325- changeState (IndexShardState .CLOSED , reason );
1323+ synchronized (mutex ) {
1324+ changeState (IndexShardState .CLOSED , reason );
1325+ }
13261326 } finally {
13271327 final Engine engine = this .currentEngineReference .getAndSet (null );
13281328 try {
@@ -1377,6 +1377,7 @@ public void prepareForIndexRecovery() {
13771377 * This is the first operation after the local checkpoint of the safe commit if exists.
13781378 */
13791379 public long recoverLocallyUpToGlobalCheckpoint () {
1380+ assert Thread .holdsLock (mutex ) == false : "recover locally under mutex" ;
13801381 if (state != IndexShardState .RECOVERING ) {
13811382 throw new IndexShardNotRecoveringException (shardId , state );
13821383 }
@@ -1428,7 +1429,7 @@ public long recoverLocallyUpToGlobalCheckpoint() {
14281429 getEngine ().recoverFromTranslog (translogRecoveryRunner , globalCheckpoint );
14291430 logger .trace ("shard locally recovered up to {}" , getEngine ().getSeqNoStats (globalCheckpoint ));
14301431 } finally {
1431- synchronized (mutex ) {
1432+ synchronized (engineMutex ) {
14321433 IOUtils .close (currentEngineReference .getAndSet (null ));
14331434 }
14341435 }
@@ -1603,23 +1604,15 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) t
16031604 : "expected empty set of retention leases with recovery source [" + recoveryState .getRecoverySource ()
16041605 + "] but got " + getRetentionLeases ();
16051606 synchronized (engineMutex ) {
1607+ assert currentEngineReference .get () == null : "engine is running" ;
1608+ verifyNotClosed ();
16061609 // we must create a new engine under mutex (see IndexShard#snapshotStoreMetadata).
16071610 final Engine newEngine = engineFactory .newReadWriteEngine (config );
1608- synchronized (mutex ) {
1609- try {
1610- verifyNotClosed ();
1611- assert currentEngineReference .get () == null : "engine is running" ;
1612- onNewEngine (newEngine );
1613- currentEngineReference .set (newEngine );
1614- // We set active because we are now writing operations to the engine; this way,
1615- // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1616- active .set (true );
1617- } finally {
1618- if (currentEngineReference .get () != newEngine ) {
1619- newEngine .close ();
1620- }
1621- }
1622- }
1611+ onNewEngine (newEngine );
1612+ currentEngineReference .set (newEngine );
1613+ // We set active because we are now writing operations to the engine; this way,
1614+ // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1615+ active .set (true );
16231616 }
16241617 // time elapses after the engine is created above (pulling the config settings) until we set the engine reference, during
16251618 // which settings changes could possibly have happened, so here we forcefully push any config changes to the new engine.
@@ -1650,7 +1643,8 @@ private void onNewEngine(Engine newEngine) {
16501643 * called if recovery has to be restarted after network error / delay **
16511644 */
16521645 public void performRecoveryRestart () throws IOException {
1653- synchronized (mutex ) {
1646+ assert Thread .holdsLock (mutex ) == false : "restart recovery under mutex" ;
1647+ synchronized (engineMutex ) {
16541648 assert refreshListeners .pendingCount () == 0 : "we can't restart with pending listeners" ;
16551649 IOUtils .close (currentEngineReference .getAndSet (null ));
16561650 resetRecoveryStage ();
@@ -3325,7 +3319,7 @@ public ParsedDocument newNoopTombstoneDoc(String reason) {
33253319 * Rollback the current engine to the safe commit, then replay local translog up to the global checkpoint.
33263320 */
33273321 void resetEngineToGlobalCheckpoint () throws IOException {
3328- assert Thread .holdsLock (engineMutex ) == false : "resetting engine under mutex" ;
3322+ assert Thread .holdsLock (mutex ) == false : "resetting engine under mutex" ;
33293323 assert getActiveOperationsCount () == OPERATIONS_BLOCKED
33303324 : "resetting engine without blocking operations; active operations are [" + getActiveOperations () + ']' ;
33313325 sync (); // persist the global checkpoint to disk
@@ -3338,6 +3332,7 @@ assert getActiveOperationsCount() == OPERATIONS_BLOCKED
33383332 final long globalCheckpoint = getLastKnownGlobalCheckpoint ();
33393333 assert globalCheckpoint == getLastSyncedGlobalCheckpoint ();
33403334 synchronized (engineMutex ) {
3335+ verifyNotClosed ();
33413336 // we must create both new read-only engine and new read-write engine under engineMutex to ensure snapshotStoreMetadata,
33423337 // acquireXXXCommit and close works.
33433338 final Engine readOnlyEngine =
@@ -3365,7 +3360,7 @@ public IndexCommitRef acquireSafeIndexCommit() {
33653360
33663361 @ Override
33673362 public void close () throws IOException {
3368- assert Thread .holdsLock (mutex );
3363+ assert Thread .holdsLock (engineMutex );
33693364
33703365 Engine newEngine = newEngineReference .get ();
33713366 if (newEngine == currentEngineReference .get ()) {
@@ -3375,36 +3370,17 @@ public void close() throws IOException {
33753370 IOUtils .close (super ::close , newEngine );
33763371 }
33773372 };
3378- synchronized (mutex ) {
3379- try {
3380- verifyNotClosed ();
3381- IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3382- } finally {
3383- if (currentEngineReference .get () != readOnlyEngine ) {
3384- readOnlyEngine .close ();
3385- }
3386- }
3387- }
3388- final Engine newReadWriteEngine = engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker ));
3389- synchronized (mutex ) {
3390- try {
3391- verifyNotClosed ();
3392- newEngineReference .set (newReadWriteEngine );
3393- onNewEngine (newReadWriteEngine );
3394- } finally {
3395- if (newEngineReference .get () != newReadWriteEngine ) {
3396- newReadWriteEngine .close (); // shard was closed
3397- }
3398- }
3399- }
3373+ IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3374+ newEngineReference .set (engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker )));
3375+ onNewEngine (newEngineReference .get ());
34003376 }
34013377 final Engine .TranslogRecoveryRunner translogRunner = (engine , snapshot ) -> runTranslogRecovery (
34023378 engine , snapshot , Engine .Operation .Origin .LOCAL_RESET , () -> {
34033379 // TODO: add a dedicate recovery stats for the reset translog
34043380 });
34053381 newEngineReference .get ().recoverFromTranslog (translogRunner , globalCheckpoint );
34063382 newEngineReference .get ().refresh ("reset_engine" );
3407- synchronized (mutex ) {
3383+ synchronized (engineMutex ) {
34083384 verifyNotClosed ();
34093385 IOUtils .close (currentEngineReference .getAndSet (newEngineReference .get ()));
34103386 // We set active because we are now writing operations to the engine; this way,
0 commit comments