4646import org .opensearch .test .MockLogAppender ;
4747import org .opensearch .test .OpenSearchTestCase ;
4848import org .opensearch .test .junit .annotations .TestLogging ;
49+ import org .opensearch .test .telemetry .TestInMemoryMetricsRegistry ;
4950import org .opensearch .threadpool .TestThreadPool ;
5051import org .opensearch .threadpool .ThreadPool ;
5152import org .junit .Before ;
7172public class FsHealthServiceTests extends OpenSearchTestCase {
7273
7374 private DeterministicTaskQueue deterministicTaskQueue ;
75+ private TestInMemoryMetricsRegistry metricsRegistry ;
7476
7577 @ Before
7678 public void createObjects () {
7779 Settings settings = Settings .builder ().put (NODE_NAME_SETTING .getKey (), "node" ).build ();
7880 deterministicTaskQueue = new DeterministicTaskQueue (settings , random ());
81+ metricsRegistry = new TestInMemoryMetricsRegistry ();
7982 }
8083
8184 public void testSchedulesHealthCheckAtRefreshIntervals () throws Exception {
8285 long refreshInterval = randomLongBetween (1000 , 12000 );
8386 final Settings settings = Settings .builder ().put (FsHealthService .REFRESH_INTERVAL_SETTING .getKey (), refreshInterval + "ms" ).build ();
8487 final ClusterSettings clusterSettings = new ClusterSettings (Settings .EMPTY , ClusterSettings .BUILT_IN_CLUSTER_SETTINGS );
8588 try (NodeEnvironment env = newNodeEnvironment ()) {
86- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , deterministicTaskQueue .getThreadPool (), env );
89+ FsHealthService fsHealthService = new FsHealthService (
90+ settings ,
91+ clusterSettings ,
92+ deterministicTaskQueue .getThreadPool (),
93+ env ,
94+ metricsRegistry
95+ );
8796 final long startTimeMillis = deterministicTaskQueue .getCurrentTimeMillis ();
8897 fsHealthService .doStart ();
8998 assertFalse (deterministicTaskQueue .hasRunnableTasks ());
@@ -117,17 +126,17 @@ public void testFailsHealthOnIOException() throws IOException {
117126 final ClusterSettings clusterSettings = new ClusterSettings (Settings .EMPTY , ClusterSettings .BUILT_IN_CLUSTER_SETTINGS );
118127 TestThreadPool testThreadPool = new TestThreadPool (getClass ().getName (), settings );
119128 try (NodeEnvironment env = newNodeEnvironment ()) {
120- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
129+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
121130 fsHealthService .new FsHealthMonitor ().run ();
122131 assertEquals (HEALTHY , fsHealthService .getHealth ().getStatus ());
123132 assertEquals ("health check passed" , fsHealthService .getHealth ().getInfo ());
124133
125134 // disrupt file system
126135 disruptFileSystemProvider .restrictPathPrefix ("" ); // disrupt all paths
127136 disruptFileSystemProvider .injectIOException .set (true );
128- fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
129137 fsHealthService .new FsHealthMonitor ().run ();
130138 assertEquals (UNHEALTHY , fsHealthService .getHealth ().getStatus ());
139+ assertEquals (Integer .valueOf (1 ), metricsRegistry .getCounterStore ().get ("fsHealth.failure.count" ).getCounterValue ());
131140 for (Path path : env .nodeDataPaths ()) {
132141 assertTrue (fsHealthService .getHealth ().getInfo ().contains (path .toString ()));
133142 }
@@ -160,7 +169,7 @@ public void testLoggingOnHungIO() throws Exception {
160169 MockLogAppender mockAppender = MockLogAppender .createForLoggers (LogManager .getLogger (FsHealthService .class ));
161170 NodeEnvironment env = newNodeEnvironment ()
162171 ) {
163- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
172+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
164173 int counter = 0 ;
165174 for (Path path : env .nodeDataPaths ()) {
166175 mockAppender .addExpectation (
@@ -202,7 +211,7 @@ public void testFailsHealthOnHungIOBeyondHealthyTimeout() throws Exception {
202211 PathUtilsForTesting .installMock (fileSystem );
203212 final ClusterSettings clusterSettings = new ClusterSettings (Settings .EMPTY , ClusterSettings .BUILT_IN_CLUSTER_SETTINGS );
204213 try (NodeEnvironment env = newNodeEnvironment ()) {
205- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
214+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
206215 logger .info ("--> Initial health status prior to the first monitor run" );
207216 StatusInfo fsHealth = fsHealthService .getHealth ();
208217 assertEquals (HEALTHY , fsHealth .getStatus ());
@@ -214,30 +223,29 @@ public void testFailsHealthOnHungIOBeyondHealthyTimeout() throws Exception {
214223 assertEquals ("health check passed" , fsHealth .getInfo ());
215224 logger .info ("--> Disrupt file system" );
216225 disruptFileSystemProvider .injectIODelay .set (true );
217- final FsHealthService fsHealthSrvc = new FsHealthService (settings , clusterSettings , testThreadPool , env );
218- fsHealthSrvc .doStart ();
226+ fsHealthService .doStart ();
219227 waitUntil (
220- () -> fsHealthSrvc .getHealth ().getStatus () == UNHEALTHY ,
228+ () -> fsHealthService .getHealth ().getStatus () == UNHEALTHY ,
221229 healthyTimeoutThreshold + (2 * refreshInterval ),
222230 TimeUnit .MILLISECONDS
223231 );
224- fsHealth = fsHealthSrvc .getHealth ();
232+ fsHealth = fsHealthService .getHealth ();
225233 assertEquals (UNHEALTHY , fsHealth .getStatus ());
226234 assertEquals ("healthy threshold breached" , fsHealth .getInfo ());
227235 int disruptedPathCount = disruptFileSystemProvider .getInjectedPathCount ();
228236 assertThat (disruptedPathCount , equalTo (1 ));
229237 logger .info ("--> Fix file system disruption" );
230238 disruptFileSystemProvider .injectIODelay .set (false );
231239 waitUntil (
232- () -> fsHealthSrvc .getHealth ().getStatus () == HEALTHY ,
240+ () -> fsHealthService .getHealth ().getStatus () == HEALTHY ,
233241 delayBetweenChecks + (4 * refreshInterval ),
234242 TimeUnit .MILLISECONDS
235243 );
236- fsHealth = fsHealthSrvc .getHealth ();
244+ fsHealth = fsHealthService .getHealth ();
237245 assertEquals (HEALTHY , fsHealth .getStatus ());
238246 assertEquals ("health check passed" , fsHealth .getInfo ());
239247 assertEquals (disruptedPathCount , disruptFileSystemProvider .getInjectedPathCount ());
240- fsHealthSrvc .doStop ();
248+ fsHealthService .doStop ();
241249 } finally {
242250 PathUtilsForTesting .teardown ();
243251 ThreadPool .terminate (testThreadPool , 500 , TimeUnit .MILLISECONDS );
@@ -254,7 +262,7 @@ public void testFailsHealthOnSinglePathFsyncFailure() throws IOException {
254262 TestThreadPool testThreadPool = new TestThreadPool (getClass ().getName (), settings );
255263 try (NodeEnvironment env = newNodeEnvironment ()) {
256264 Path [] paths = env .nodeDataPaths ();
257- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
265+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
258266 fsHealthService .new FsHealthMonitor ().run ();
259267 assertEquals (HEALTHY , fsHealthService .getHealth ().getStatus ());
260268 assertEquals ("health check passed" , fsHealthService .getHealth ().getInfo ());
@@ -263,9 +271,9 @@ public void testFailsHealthOnSinglePathFsyncFailure() throws IOException {
263271 disruptFsyncFileSystemProvider .injectIOException .set (true );
264272 String disruptedPath = randomFrom (paths ).toString ();
265273 disruptFsyncFileSystemProvider .restrictPathPrefix (disruptedPath );
266- fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
267274 fsHealthService .new FsHealthMonitor ().run ();
268275 assertEquals (UNHEALTHY , fsHealthService .getHealth ().getStatus ());
276+ assertEquals (Integer .valueOf (1 ), metricsRegistry .getCounterStore ().get ("fsHealth.failure.count" ).getCounterValue ());
269277 assertThat (fsHealthService .getHealth ().getInfo (), is ("health check failed on [" + disruptedPath + "]" ));
270278 assertEquals (1 , disruptFsyncFileSystemProvider .getInjectedPathCount ());
271279 } finally {
@@ -285,7 +293,7 @@ public void testFailsHealthOnSinglePathWriteFailure() throws IOException {
285293 TestThreadPool testThreadPool = new TestThreadPool (getClass ().getName (), settings );
286294 try (NodeEnvironment env = newNodeEnvironment ()) {
287295 Path [] paths = env .nodeDataPaths ();
288- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
296+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
289297 fsHealthService .new FsHealthMonitor ().run ();
290298 assertEquals (HEALTHY , fsHealthService .getHealth ().getStatus ());
291299 assertEquals ("health check passed" , fsHealthService .getHealth ().getInfo ());
@@ -294,9 +302,9 @@ public void testFailsHealthOnSinglePathWriteFailure() throws IOException {
294302 String disruptedPath = randomFrom (paths ).toString ();
295303 disruptWritesFileSystemProvider .restrictPathPrefix (disruptedPath );
296304 disruptWritesFileSystemProvider .injectIOException .set (true );
297- fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
298305 fsHealthService .new FsHealthMonitor ().run ();
299306 assertEquals (UNHEALTHY , fsHealthService .getHealth ().getStatus ());
307+ assertEquals (Integer .valueOf (1 ), metricsRegistry .getCounterStore ().get ("fsHealth.failure.count" ).getCounterValue ());
300308 assertThat (fsHealthService .getHealth ().getInfo (), is ("health check failed on [" + disruptedPath + "]" ));
301309 assertEquals (1 , disruptWritesFileSystemProvider .getInjectedPathCount ());
302310 } finally {
@@ -319,17 +327,17 @@ public void testFailsHealthOnUnexpectedLockFileSize() throws IOException {
319327 PathUtilsForTesting .installMock (fileSystem );
320328 final ClusterSettings clusterSettings = new ClusterSettings (Settings .EMPTY , ClusterSettings .BUILT_IN_CLUSTER_SETTINGS );
321329 try (NodeEnvironment env = newNodeEnvironment ()) {
322- FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
330+ FsHealthService fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env , metricsRegistry );
323331 fsHealthService .new FsHealthMonitor ().run ();
324332 assertEquals (HEALTHY , fsHealthService .getHealth ().getStatus ());
325333 assertEquals ("health check passed" , fsHealthService .getHealth ().getInfo ());
326334
327335 // enabling unexpected file size injection
328336 unexpectedLockFileSizeFileSystemProvider .injectUnexpectedFileSize .set (true );
329337
330- fsHealthService = new FsHealthService (settings , clusterSettings , testThreadPool , env );
331338 fsHealthService .new FsHealthMonitor ().run ();
332339 assertEquals (UNHEALTHY , fsHealthService .getHealth ().getStatus ());
340+ assertEquals (Integer .valueOf (1 ), metricsRegistry .getCounterStore ().get ("fsHealth.failure.count" ).getCounterValue ());
333341 assertThat (fsHealthService .getHealth ().getInfo (), is ("health check failed due to broken node lock" ));
334342 assertEquals (1 , unexpectedLockFileSizeFileSystemProvider .getInjectedPathCount ());
335343 } finally {
0 commit comments