Skip to content

Commit 44b0543

Browse files
authored
[Mono] Fix deadlock during gcdump when using interp full AOT fallback. (#89726)
GC thread doing gcdump will dump the EventPipe events after world has restarted but before releasing GC lock. There was one case where we logged a bulk type during that face where a type didn't have its finalizer data initialized and at the same time main thread running interpreter held loader lock and tried to acquire GC lock, that triggers a deadlock since the GC thread (still holding the GC lock) would trigger logic to initialize the finalizer data, but that in turn requires the GC lock. Fix delays the fire of GC dump events until after we completed GC. All GC dump events have been cached into a temp file and will be written into EventPipe, the only potential issue with this is that we keep vtable pointers in cache that will be resolved when emitting EventPipe event, after releasing GC lock, but since we currently won't unload vtables, that is not an issue, but needs to be addressed if/when we implement ability to unload vtables. We would then need to root the vtables while stored in temporary cache. Commit also enable GC dump test on Mono platforms.
1 parent b2e18f8 commit 44b0543

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4165,6 +4165,9 @@ calculate_live_keywords (
41654165
ep_requires_lock_held ();
41664166
}
41674167

4168+
// TODO: If/when we can unload vtables, we would need to temporary
4169+
// root the vtable pointers currently stored in buffered gc events.
4170+
// Once all events are fired, we can remove root from GC.
41684171
static
41694172
void
41704173
gc_event_callback (
@@ -4199,7 +4202,6 @@ gc_event_callback (
41994202
if (is_gc_heap_dump_enabled (context)) {
42004203
EP_ASSERT (context->state == GC_HEAP_DUMP_CONTEXT_STATE_DUMP);
42014204
gc_heap_dump_context_build_roots (context);
4202-
fire_buffered_gc_events (context);
42034205
}
42044206
break;
42054207
}
@@ -4289,6 +4291,7 @@ gc_heap_dump_trigger_callback (MonoProfiler *prof)
42894291
mono_profiler_set_gc_event_callback (_ep_rt_mono_default_profiler_provider, gc_event_callback);
42904292
mono_gc_collect (mono_gc_max_generation ());
42914293
mono_profiler_set_gc_event_callback (_ep_rt_mono_default_profiler_provider, NULL);
4294+
fire_buffered_gc_events (heap_dump_context);
42924295
}
42934296

42944297
heap_dump_context->state = GC_HEAP_DUMP_CONTEXT_STATE_END;

src/tests/issues.targets

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,9 +1895,6 @@
18951895
<ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/22021/consumer/**">
18961896
<Issue>needs triage</Issue>
18971897
</ExcludeList>
1898-
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/gcdump/gcdump/**">
1899-
<Issue>needs triage</Issue>
1900-
</ExcludeList>
19011898
<ExcludeList Include="$(XunitTestBinBase)/Interop/DllImportAttribute/DllImportPath/**">
19021899
<Issue>needs triage</Issue>
19031900
</ExcludeList>
@@ -3523,6 +3520,9 @@
35233520
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/simpleruntimeeventvalidation/**">
35243521
<Issue>System.Diagnostics.Process is not supported on wasm</Issue>
35253522
</ExcludeList>
3523+
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/gcdump/**">
3524+
<Issue>System.Diagnostics.Process is not supported on wasm</Issue>
3525+
</ExcludeList>
35263526
</ItemGroup>
35273527

35283528
<ItemGroup Condition="'$(TargetOS)' == 'android'" >

src/tests/tracing/eventpipe/gcdump/gcdump.cs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,16 @@ namespace Tracing.Tests.EventSourceError
2020
// Regression test for https://github.com/dotnet/runtime/issues/38639
2121
public class GCDumpTest
2222
{
23+
private static bool _seenGCStart = false;
24+
private static bool _seenGCStop = false;
2325
private static int _bulkTypeCount = 0;
2426
private static int _bulkNodeCount = 0;
2527
private static int _bulkEdgeCount = 0;
2628
private static int _bulkRootEdgeCount = 0;
2729
private static int _bulkRootStaticVarCount = 0;
2830

2931
private static readonly ulong GC_HeapDump_Keyword = 0x100000UL;
32+
private static ManualResetEvent _gcStopReceived = new ManualResetEvent(false);
3033

3134
public static int Main()
3235
{
@@ -48,11 +51,17 @@ public static int Main()
4851

4952
private static Action _eventGeneratingAction = () =>
5053
{
51-
// This space intentionally left blank
54+
// Wait up to 10 seconds to receive GCStop event.
55+
_gcStopReceived.WaitOne(10000);
5256
};
5357

5458
private static Func<EventPipeEventSource, Func<int>> _DoesRundownContainMethodEvents = (source) =>
5559
{
60+
source.Clr.GCStart += (GCStartTraceData data) =>
61+
{
62+
_seenGCStart = true;
63+
};
64+
5665
source.Clr.TypeBulkType += (GCBulkTypeTraceData data) =>
5766
{
5867
_bulkTypeCount += data.Count;
@@ -78,13 +87,21 @@ public static int Main()
7887
_bulkRootStaticVarCount += data.Count;
7988
};
8089

90+
source.Clr.GCStop += (GCEndTraceData data) =>
91+
{
92+
_seenGCStop = true;
93+
_gcStopReceived.Set();
94+
};
95+
8196
return () =>
8297
{
8398
// Hopefully it is low enough to be resilient to changes in the runtime
8499
// and high enough to catch issues. There should be between hundreds and thousands
85100
// for each, but the number is variable and the point of the test is to verify
86101
// that we get any events at all.
87-
if (_bulkTypeCount > 50
102+
if (_seenGCStart
103+
&& _seenGCStop
104+
&& _bulkTypeCount > 50
88105
&& _bulkNodeCount > 50
89106
&& _bulkEdgeCount > 50
90107
&& _bulkRootEdgeCount > 50
@@ -95,6 +112,8 @@ public static int Main()
95112

96113

97114
Console.WriteLine($"Test failed due to missing GC heap events.");
115+
Console.WriteLine($"_seenGCStart = {_seenGCStart}");
116+
Console.WriteLine($"_seenGCStop = {_seenGCStop}");
98117
Console.WriteLine($"_bulkTypeCount = {_bulkTypeCount}");
99118
Console.WriteLine($"_bulkNodeCount = {_bulkNodeCount}");
100119
Console.WriteLine($"_bulkEdgeCount = {_bulkEdgeCount}");

0 commit comments

Comments
 (0)