Skip to content

Commit 4c3edd5

Browse files
authored
Fix shutdown with multiple unhandled HW exceptions (#105578)
When multiple threads crash with hardware unhandled exceptions at the same time, the fact that we were uninstalling async signal handlers at process exit caused crashes when some thread reached the signal handler after .NET handler was removed. This change fixes it by not restoring the signal handlers during process exit. It actually stops restoring any signal handlers except for SIGABRT that has to be restored to actually enable the process exit with abort(). Close #46175
1 parent b2b0db0 commit 4c3edd5

File tree

6 files changed

+68
-27
lines changed

6 files changed

+68
-27
lines changed

src/coreclr/pal/src/exception/seh.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ SEHCleanup()
109109
{
110110
TRACE("Cleaning up SEH\n");
111111

112-
SEHCleanupSignals();
112+
SEHCleanupSignals(false /* isChildProcess */);
113113
}
114114

115115
/*++

src/coreclr/pal/src/exception/signal.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ Function :
279279
Restore default signal handlers
280280
281281
Parameters :
282-
None
282+
isChildProcess - indicates that it is called from a child process fork
283283
284284
(no return value)
285285
@@ -288,34 +288,42 @@ reason for this function is that during PAL_Terminate, we reach a point where
288288
SEH isn't possible anymore (handle manager is off, etc). Past that point,
289289
we can't avoid crashing on a signal.
290290
--*/
291-
void SEHCleanupSignals()
291+
void SEHCleanupSignals(bool isChildProcess)
292292
{
293293
TRACE("Restoring default signal handlers\n");
294294

295-
if (g_registered_signal_handlers)
295+
if (isChildProcess)
296296
{
297-
restore_signal(SIGILL, &g_previous_sigill);
297+
if (g_registered_signal_handlers)
298+
{
299+
restore_signal(SIGILL, &g_previous_sigill);
298300
#if !HAVE_MACH_EXCEPTIONS
299-
restore_signal(SIGTRAP, &g_previous_sigtrap);
301+
restore_signal(SIGTRAP, &g_previous_sigtrap);
300302
#endif
301-
restore_signal(SIGFPE, &g_previous_sigfpe);
302-
restore_signal(SIGBUS, &g_previous_sigbus);
303-
restore_signal(SIGABRT, &g_previous_sigabrt);
304-
restore_signal(SIGSEGV, &g_previous_sigsegv);
305-
restore_signal(SIGINT, &g_previous_sigint);
306-
restore_signal(SIGQUIT, &g_previous_sigquit);
307-
}
303+
restore_signal(SIGFPE, &g_previous_sigfpe);
304+
restore_signal(SIGBUS, &g_previous_sigbus);
305+
restore_signal(SIGSEGV, &g_previous_sigsegv);
306+
restore_signal(SIGINT, &g_previous_sigint);
307+
restore_signal(SIGQUIT, &g_previous_sigquit);
308+
}
308309

309310
#ifdef INJECT_ACTIVATION_SIGNAL
310-
if (g_registered_activation_handler)
311-
{
312-
restore_signal(INJECT_ACTIVATION_SIGNAL, &g_previous_activation);
313-
}
311+
if (g_registered_activation_handler)
312+
{
313+
restore_signal(INJECT_ACTIVATION_SIGNAL, &g_previous_activation);
314+
}
314315
#endif
315316

316-
if (g_registered_sigterm_handler)
317+
if (g_registered_sigterm_handler)
318+
{
319+
restore_signal(SIGTERM, &g_previous_sigterm);
320+
}
321+
}
322+
323+
// Restore only the SIGABRT so that abort that ends up the process can actually end it
324+
if (g_registered_signal_handlers)
317325
{
318-
restore_signal(SIGTERM, &g_previous_sigterm);
326+
restore_signal(SIGABRT, &g_previous_sigabrt);
319327
}
320328
}
321329

src/coreclr/pal/src/include/pal/signal.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,12 @@ Function :
112112
SEHCleanupSignals
113113
114114
Restore default signal handlers
115+
Parameters :
116+
isChildProcess - indicates that it is called from a child process fork
115117
116118
(no parameters, no return value)
117119
--*/
118-
void SEHCleanupSignals();
120+
void SEHCleanupSignals(bool isChildProcess);
119121

120122
/*++
121123
Function :

src/coreclr/pal/src/thread/process.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2257,7 +2257,7 @@ PROCCreateCrashDump(
22572257
if (g_createdumpCallback != nullptr)
22582258
{
22592259
// Remove the signal handlers inherited from the runtime process
2260-
SEHCleanupSignals();
2260+
SEHCleanupSignals(true /* isChildProcess */);
22612261

22622262
// Call the statically linked createdump code
22632263
g_createdumpCallback(argv.size(), argv.data());
@@ -2556,7 +2556,7 @@ PROCAbort(int signal, siginfo_t* siginfo)
25562556

25572557
// Restore all signals; the SIGABORT handler to prevent recursion and
25582558
// the others to prevent multiple core dumps from being generated.
2559-
SEHCleanupSignals();
2559+
SEHCleanupSignals(false /* isChildProcess */);
25602560

25612561
// Abort the process after waiting for the core dump to complete
25622562
abort();

src/tests/baseservices/exceptions/stackoverflow/stackoverflowtester.cs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Collections.Generic;
55
using System.Diagnostics;
66
using System.IO;
7+
using System.Runtime.InteropServices;
78
using System.Text;
89
using Xunit;
910

@@ -22,12 +23,27 @@ static void TestStackOverflow(string testName, string testArgs, out List<string>
2223
testProcess.StartInfo.Arguments = $"{Path.Combine(Directory.GetCurrentDirectory(), "..", testName, $"{testName}.dll")} {testArgs}";
2324
testProcess.StartInfo.UseShellExecute = false;
2425
testProcess.StartInfo.RedirectStandardError = true;
26+
testProcess.StartInfo.Environment.Add("DOTNET_DbgEnableMiniDump", "0");
27+
bool endOfStackTrace = false;
28+
2529
testProcess.ErrorDataReceived += (sender, line) =>
2630
{
2731
Console.WriteLine($"\"{line.Data}\"");
28-
if (!string.IsNullOrEmpty(line.Data))
32+
if (!endOfStackTrace && !string.IsNullOrEmpty(line.Data))
2933
{
30-
lines.Add(line.Data);
34+
// Store lines only till the end of the stack trace.
35+
// In the CI it can also contain lines with createdump info.
36+
if (line.Data.StartsWith("Stack overflow.") ||
37+
line.Data.StartsWith("Repeated ") ||
38+
line.Data.StartsWith("------") ||
39+
line.Data.StartsWith(" at "))
40+
{
41+
lines.Add(line.Data);
42+
}
43+
else
44+
{
45+
endOfStackTrace = true;
46+
}
3147
}
3248
};
3349

@@ -99,6 +115,15 @@ public static void TestStackOverflowSmallFrameMainThread()
99115
[Fact]
100116
public static void TestStackOverflowLargeFrameMainThread()
101117
{
118+
if ((RuntimeInformation.ProcessArchitecture == Architecture.Arm64) &&
119+
((Environment.OSVersion.Platform == PlatformID.Unix) || (Environment.OSVersion.Platform == PlatformID.MacOSX)))
120+
{
121+
// Disabled on Unix ARM64 due to https://github.com/dotnet/runtime/issues/13519
122+
// The current stack probing doesn't move the stack pointer and so the runtime sometimes cannot
123+
// recognize the underlying sigsegv as stack overflow when it probes too far from SP.
124+
return;
125+
}
126+
102127
TestStackOverflow("stackoverflow", "largeframe main", out List<string> lines);
103128

104129
if (!lines[lines.Count - 1].EndsWith("at TestStackOverflow.Program.Main(System.String[])"))
@@ -156,6 +181,15 @@ public static void TestStackOverflowSmallFrameSecondaryThread()
156181
[Fact]
157182
public static void TestStackOverflowLargeFrameSecondaryThread()
158183
{
184+
if ((RuntimeInformation.ProcessArchitecture == Architecture.Arm64) &&
185+
((Environment.OSVersion.Platform == PlatformID.Unix) || (Environment.OSVersion.Platform == PlatformID.MacOSX)))
186+
{
187+
// Disabled on Unix ARM64 due to https://github.com/dotnet/runtime/issues/13519
188+
// The current stack probing doesn't move the stack pointer and so the runtime sometimes cannot
189+
// recognize the underlying sigsegv as stack overflow when it probes too far from SP.
190+
return;
191+
}
192+
159193
TestStackOverflow("stackoverflow", "largeframe secondary", out List<string> lines);
160194

161195
if (!lines.Exists(elem => elem.EndsWith("at TestStackOverflow.Program.Test(Boolean)")))

src/tests/issues.targets

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,6 @@
8989

9090
<!-- All Unix targets on CoreCLR Runtime -->
9191
<ItemGroup Condition="'$(XunitTestBinBase)' != '' and '$(TargetsWindows)' != 'true' and '$(RuntimeFlavor)' == 'coreclr' ">
92-
<ExcludeList Include="$(XunitTestBinBase)/baseservices/exceptions/stackoverflow/stackoverflowtester/*">
93-
<Issue>https://github.com/dotnet/runtime/issues/46175</Issue>
94-
</ExcludeList>
9592
<ExcludeList Include="$(XunitTestBinBase)/baseservices/typeequivalence/signatures/nopiatestil/*">
9693
<Issue>CoreCLR doesn't support type equivalence on Unix</Issue>
9794
</ExcludeList>

0 commit comments

Comments
 (0)