Skip to content

Commit 728fd85

Browse files
authored
[Merge-on-Red] - Implement Test Process Watcher (#78742)
Initial implementation of the test watcher that looks out for hangs and freezes during test runs.
1 parent f52e277 commit 728fd85

File tree

6 files changed

+186
-9
lines changed

6 files changed

+186
-9
lines changed

src/coreclr/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ else()
119119
endif()
120120
endif()
121121

122+
#----------------------------------------------------
123+
# Build the test watchdog alongside the CLR
124+
#----------------------------------------------------
125+
add_subdirectory("${CLR_SRC_NATIVE_DIR}/watchdog" test-watchdog)
126+
122127
# Add this subdir. We install the headers for the jit.
123128
add_subdirectory(pal/prebuilt/inc)
124129

@@ -275,3 +280,4 @@ endif(NOT CLR_CMAKE_HOST_MACCATALYST AND NOT CLR_CMAKE_HOST_IOS AND NOT CLR_CMAK
275280
if(CLR_CROSS_COMPONENTS_BUILD)
276281
include(crosscomponents.cmake)
277282
endif(CLR_CROSS_COMPONENTS_BUILD)
283+

src/native/watchdog/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
add_executable_clr(watchdog ${CMAKE_CURRENT_LIST_DIR}/watchdog.cpp)
2+
install_clr(TARGETS watchdog DESTINATIONS . COMPONENT hosts)
3+
install_clr(TARGETS watchdog DESTINATIONS . COMPONENT nativeaot)
4+

src/native/watchdog/watchdog.cpp

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#include <cstdio>
5+
#include <cstdlib>
6+
#include <errno.h>
7+
#include <signal.h>
8+
9+
#ifdef TARGET_WINDOWS
10+
11+
#include <windows.h>
12+
#include <string>
13+
14+
#else // !TARGET_WINDOWS
15+
16+
#include <chrono>
17+
#include <sys/wait.h>
18+
#include <thread>
19+
#include <unistd.h>
20+
#include <vector>
21+
22+
#endif // TARGET_WINDOWS
23+
24+
int run_timed_process(const long, const int, const char *[]);
25+
26+
#ifdef TARGET_X86
27+
int __cdecl main(const int argc, const char *argv[])
28+
#else
29+
int main(const int argc, const char *argv[])
30+
#endif
31+
{
32+
if (argc < 3)
33+
{
34+
printf("There are missing arguments. Got %d instead of 3+ :(\n", argc);
35+
return EXIT_FAILURE;
36+
}
37+
38+
const long timeout_sec = strtol(argv[1], nullptr, 10);
39+
int exit_code = run_timed_process(timeout_sec * 1000L, argc-2, &argv[2]);
40+
41+
printf("App Exit Code: %d\n", exit_code);
42+
return exit_code;
43+
}
44+
45+
int run_timed_process(const long timeout_ms, const int proc_argc, const char *proc_argv[])
46+
{
47+
#ifdef TARGET_WINDOWS
48+
std::string cmdline(proc_argv[0]);
49+
50+
for (int i = 1; i < proc_argc; i++)
51+
{
52+
cmdline.append(" ");
53+
cmdline.append(proc_argv[i]);
54+
}
55+
56+
STARTUPINFOA startup_info;
57+
PROCESS_INFORMATION proc_info;
58+
unsigned long exit_code;
59+
60+
ZeroMemory(&startup_info, sizeof(startup_info));
61+
startup_info.cb = sizeof(startup_info);
62+
ZeroMemory(&proc_info, sizeof(proc_info));
63+
64+
if (!CreateProcessA(NULL, &cmdline[0], NULL, NULL, FALSE, 0, NULL, NULL,
65+
&startup_info, &proc_info))
66+
{
67+
int error_code = GetLastError();
68+
printf("Process creation failed... Code %d.\n", error_code);
69+
return error_code;
70+
}
71+
72+
WaitForSingleObject(proc_info.hProcess, timeout_ms);
73+
GetExitCodeProcess(proc_info.hProcess, &exit_code);
74+
75+
CloseHandle(proc_info.hProcess);
76+
CloseHandle(proc_info.hThread);
77+
return exit_code;
78+
79+
#else // !TARGET_WINDOWS
80+
81+
const int check_interval_ms = 25;
82+
int check_count = 0;
83+
std::vector<const char*> args;
84+
85+
pid_t child_pid;
86+
int child_status;
87+
int wait_code;
88+
89+
for (int i = 0; i < proc_argc; i++)
90+
{
91+
args.push_back(proc_argv[i]);
92+
}
93+
args.push_back(NULL);
94+
95+
child_pid = fork();
96+
97+
if (child_pid < 0)
98+
{
99+
// Fork failed. No memory remaining available :(
100+
printf("Fork failed... Returning ENOMEM.\n");
101+
return ENOMEM;
102+
}
103+
else if (child_pid == 0)
104+
{
105+
// Instructions for child process!
106+
execv(args[0], const_cast<char* const*>(args.data()));
107+
}
108+
else
109+
{
110+
do
111+
{
112+
// Instructions for the parent process!
113+
wait_code = waitpid(child_pid, &child_status, WNOHANG);
114+
115+
if (wait_code == -1)
116+
return EINVAL;
117+
118+
std::this_thread::sleep_for(std::chrono::milliseconds(check_interval_ms));
119+
120+
if (wait_code)
121+
{
122+
if (WIFEXITED(child_status))
123+
return WEXITSTATUS(child_status);
124+
}
125+
check_count++;
126+
127+
} while (check_count < (timeout_ms / check_interval_ms));
128+
}
129+
130+
printf("Child process took too long. Timed out... Exiting...\n");
131+
kill(child_pid, SIGKILL);
132+
133+
#endif // TARGET_WINDOWS
134+
return ETIMEDOUT;
135+
}
136+

src/tests/Common/CLRTest.Execute.Bash.targets

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,18 @@ fi
187187
<Command><![CDATA[ export __DotEnv="${i#*=}"
188188
if [ ! -f "$__DotEnv" ]
189189
then
190-
echo "The Debugger FullPath %5C%22${__DotEnv}%5C%22 does not exist"
190+
echo "The dotenv file FullPath %5C%22${__DotEnv}%5C%22 does not exist"
191191
usage
192192
fi
193193
export __DotEnvArg=-e ${__DotEnv}]]></Command>
194194
<Description>A dotenv file to pass to corerun to set environment variables for the test run.</Description>
195195
</BashCLRTestExecutionScriptArgument>
196+
197+
<BashCLRTestExecutionScriptArgument Include="usewatcher">
198+
<HasParam>false</HasParam>
199+
<Command><![CDATA[ _RunWithWatcher=1]]></Command>
200+
<Description>Run the tests using the test watcher.</Description>
201+
</BashCLRTestExecutionScriptArgument>
196202
</ItemGroup>
197203

198204
<PropertyGroup>
@@ -250,10 +256,11 @@ then
250256
exit 1
251257
fi
252258
253-
# Copy CORECLR native binaries to $LinkBin,
259+
# Copy CORECLR native binaries and the test watcher to $LinkBin,
254260
# so that we can run the test based on that directory
255261
cp $CORE_ROOT/*.so $LinkBin/
256262
cp $CORE_ROOT/corerun $LinkBin/
263+
cp $CORE_ROOT/watchdog $LinkBin/
257264
258265
# Copy some files that may be arguments
259266
for f in *.txt;
@@ -283,6 +290,7 @@ fi
283290
</PropertyGroup>
284291
<PropertyGroup>
285292
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"$CORE_ROOT/corerun" $(CoreRunArgs) ${__DotEnvArg}</CLRTestRunFile>
293+
<WatcherRunFile>"$CORE_ROOT/watchdog" 300</WatcherRunFile>
286294

287295
<!-- Note that this overwrites CLRTestBashPreCommands rather than adding to it. -->
288296
<CLRTestBashPreCommands Condition="'$(CLRTestKind)' == 'BuildAndRun' and '$(TargetArchitecture)' == 'wasm'"><![CDATA[
@@ -318,6 +326,9 @@ fi
318326
if [ ! -z "$CLRCustomTestLauncher" ];
319327
then
320328
LAUNCHER="$CLRCustomTestLauncher $PWD/"
329+
elif [ "$_RunWithWatcher" == 1 ];
330+
then
331+
LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
321332
else
322333
LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
323334
fi
@@ -346,8 +357,11 @@ $(BashLinkerTestLaunchCmds)
346357
if [ ! -z "$CLRCustomTestLauncher" ];
347358
then
348359
LAUNCHER="$CLRCustomTestLauncher $PWD/"
360+
elif [ "$_RunWithWatcher" == 1 ];
361+
then
362+
LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
349363
else
350-
LAUNCHER="$_DebuggerFullPath $(CLRTestRunFile)"
364+
LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
351365
fi
352366
353367
$(BashIlrtTestLaunchCmds)
@@ -484,7 +498,7 @@ usage()
484498
for i in "$@"
485499
do
486500
case $i in
487-
-?|-h|--help)
501+
-?|-h|--help|/?|/h|/help)
488502
usage
489503
%3B%3B
490504
@(BashCLRTestExecutionScriptArgument -> ' -%(Identity)%(ParamText)|/%(Identity)%(ParamText))
@@ -534,6 +548,7 @@ ReleaseLock()
534548
}
535549
cd "$%28dirname "${BASH_SOURCE[0]}")"
536550
LockFile="lock"
551+
_RunWithWatcher=0
537552
538553
539554
# The __TestEnv variable may be used to specify a script to source before the test.

src/tests/Common/CLRTest.Execute.Batch.targets

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,14 @@ Exit /b 0
216216
]]></Command>
217217
<Description>Set CORE_ROOT to the specified value before running the test.</Description>
218218
</BatchCLRTestExecutionScriptArgument>
219+
220+
<BatchCLRTestExecutionScriptArgument Include="usewatcher">
221+
<HasParam>false</HasParam>
222+
<Command><![CDATA[
223+
set /A _RunWithWatcher=1
224+
]]></Command>
225+
<Description>Run the tests using the test watcher.</Description>
226+
</BatchCLRTestExecutionScriptArgument>
219227
</ItemGroup>
220228

221229
<PropertyGroup>
@@ -260,17 +268,18 @@ IF defined DoLink (
260268
Exit /b 1
261269
)
262270
263-
REM Copy CORECLR native binaries to %LinkBin%, so that we can run the test based on that directory
271+
REM Copy CORECLR native binaries and the test watcher to %LinkBin%, so that we can run the test based on that directory
264272
copy %CORE_ROOT%\clrjit.dll %LinkBin% > nul 2> nul
265273
copy %CORE_ROOT%\coreclr.dll %LinkBin% > nul 2> nul
266274
copy %CORE_ROOT%\mscorrc.dll %LinkBin% > nul 2> nul
267275
copy %CORE_ROOT%\CoreRun.exe %LinkBin% > nul 2> nul
276+
copy %CORE_ROOT%\watchdog.exe %LinkBin% > nul 2> nul
268277
269278
REM Copy some files that may be arguments
270279
copy *.txt %LinkBin% > nul 2> nul
271280
272281
set ExePath=%LinkBin%\$(InputAssemblyName)
273-
set CORE_ROOT=%scriptPath%LinkBin%
282+
set CORE_ROOT=%scriptPath%\%LinkBin%
274283
)
275284
]]>
276285
</BatchLinkerTestLaunchCmds>
@@ -289,6 +298,8 @@ if defined DoLink (
289298
</PropertyGroup>
290299
<PropertyGroup>
291300
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"%CORE_ROOT%\corerun.exe" $(CoreRunArgs) %__DotEnvArg%</CLRTestRunFile>
301+
<WatcherRunFile>"%CORE_ROOT%\watchdog.exe" 300</WatcherRunFile>
302+
292303
<BatchCopyCoreShimLocalCmds Condition="'$(CLRTestScriptLocalCoreShim)' == 'true'"><![CDATA[
293304
REM Local CoreShim requested - see MSBuild property 'CLRTestScriptLocalCoreShim'
294305
ECHO Copying '%CORE_ROOT%\CoreShim.dll'...
@@ -301,6 +312,8 @@ $(BatchCopyCoreShimLocalCmds)
301312
302313
IF NOT "%CLRCustomTestLauncher%"=="" (
303314
set LAUNCHER=call %CLRCustomTestLauncher% %scriptPath%
315+
) ELSE IF %_RunWithWatcher% EQU 1 (
316+
set LAUNCHER=$(WatcherRunFile) $(CLRTestRunFile)
304317
) ELSE (
305318
set LAUNCHER=%_DebuggerFullPath% $(CLRTestRunFile)
306319
)
@@ -425,6 +438,7 @@ setlocal ENABLEDELAYEDEXPANSION
425438
set "lockFolder=%~dp0\lock"
426439
pushd %~dp0
427440
set "scriptPath=%~dp0"
441+
set /A _RunWithWatcher=0
428442
429443
$(BatchCLRTestArgPrep)
430444
$(BatchCLRTestExitCodePrep)

src/tests/Common/helixpublishwitharcade.proj

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,8 @@
411411
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="export TEST_HARNESS_STRIPE_TO_EXECUTE=.0.1" />
412412
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="chmod +x $(_MergedWrapperRunScriptRelative)" />
413413
<!-- Force assemblies to lazy-load for LLVM AOT test runs to enable using tests that fail at AOT time (and as a result can't be AOTd) -->
414-
<HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) --aot-lazy-assembly-load" />
415-
<HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative)" />
414+
<HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
415+
<HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
416416
<HelixCommandLines Include="$(XUnitLogCheckerCommand)" />
417417
</ItemGroup>
418418

@@ -722,7 +722,9 @@
722722

723723
<ItemGroup Condition=" '$(UsesHelixSdk)' == 'true' ">
724724
<HelixCorrelationPayload Include="$(CoreRootDirectory)" />
725-
<HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" />
725+
726+
<!-- Browser-Wasm follows a very different workflow, which is currently out of scope of the Log Checker. -->
727+
<HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" Condition="'$(TargetsBrowser)' != 'true'" />
726728

727729
<LegacyPayloads Include="$([System.IO.Directory]::GetDirectories($(LegacyPayloadsRootDirectory)))" Condition="Exists('$(LegacyPayloadsRootDirectory)')" />
728730
<LegacyPayloads Update="@(LegacyPayloads)">

0 commit comments

Comments
 (0)