Skip to content

Commit edfa48a

Browse files
author
Andrey Ershov
committed
SnapshotDisruptionIT
Without changes test fails, because Zen2 retries snapshot creation as soon as network partition heals. This results into race between creating snapshot and test cleanup logic (deleting index). Zen1 on the other hand, also schedules retry, but it takes some time after network partition heals, so cleanup logic executes latter and test passes. The check that snapshot is eventually created is added to the end of the test
1 parent 3395e2e commit edfa48a

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ protected Settings nodeSettings(int nodeOrdinal) {
6868
return Settings.builder().put(super.nodeSettings(nodeOrdinal))
6969
.put(AbstractDisruptionTestCase.DEFAULT_SETTINGS)
7070
.put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false)
71-
.put(TestZenDiscovery.USE_ZEN2.getKey(), false) // requires more work
7271
.put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s")
7372
.build();
7473
}
@@ -133,7 +132,7 @@ public void clusterChanged(ClusterChangedEvent event) {
133132

134133
logger.info("--> wait until the snapshot is done");
135134
assertBusy(() -> {
136-
SnapshotsInProgress snapshots = dataNodeClient().admin().cluster().prepareState().setLocal(true).get().getState()
135+
SnapshotsInProgress snapshots = dataNodeClient().admin().cluster().prepareState().setLocal(false).get().getState()
137136
.custom(SnapshotsInProgress.TYPE);
138137
if (snapshots != null && snapshots.entries().size() > 0) {
139138
logger.info("Current snapshot state [{}]", snapshots.entries().get(0).state());
@@ -146,15 +145,9 @@ public void clusterChanged(ClusterChangedEvent event) {
146145
logger.info("--> verify that snapshot was successful or no longer exist");
147146
assertBusy(() -> {
148147
try {
149-
GetSnapshotsResponse snapshotsStatusResponse = dataNodeClient().admin().cluster().prepareGetSnapshots("test-repo")
150-
.setSnapshots("test-snap-2").get();
151-
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
152-
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
153-
assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards());
154-
assertEquals(0, snapshotInfo.failedShards());
155-
logger.info("--> done verifying");
148+
assertSnapshotExists("test-repo", "test-snap-2");
156149
} catch (SnapshotMissingException exception) {
157-
logger.info("--> snapshot doesn't exist");
150+
logger.info("--> done verifying, snapshot doesn't exist");
158151
}
159152
}, 1, TimeUnit.MINUTES);
160153

@@ -172,6 +165,21 @@ public void clusterChanged(ClusterChangedEvent event) {
172165
cause = cause.getCause();
173166
assertThat(cause, instanceOf(FailedToCommitClusterStateException.class));
174167
}
168+
169+
logger.info("--> verify that snapshot eventually will be created due to retries");
170+
assertBusy(() -> {
171+
assertSnapshotExists("test-repo", "test-snap-2");
172+
}, 1, TimeUnit.MINUTES);
173+
}
174+
175+
private void assertSnapshotExists(String repository, String snapshot) {
176+
GetSnapshotsResponse snapshotsStatusResponse = dataNodeClient().admin().cluster().prepareGetSnapshots(repository)
177+
.setSnapshots(snapshot).get();
178+
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
179+
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
180+
assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards());
181+
assertEquals(0, snapshotInfo.failedShards());
182+
logger.info("--> done verifying, snapshot exists");
175183
}
176184

177185
private void createRandomIndex(String idxName) throws InterruptedException {

0 commit comments

Comments
 (0)