-
Notifications
You must be signed in to change notification settings - Fork 5.2k
[EventPipe] Add EventPipeBuffer corruption diagnostics #118874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fb9b360
6df06bc
a16ece5
6a74500
598421c
af7729e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -507,6 +507,23 @@ ep_rt_config_value_get_enable_stackwalk (void) | |
| return false; | ||
| } | ||
|
|
||
| static | ||
| inline | ||
| uint32_t | ||
| ep_rt_config_value_get_buffer_guard_level (void) | ||
| { | ||
| STATIC_CONTRACT_NOTHROW; | ||
|
|
||
| uint64_t value; | ||
| if (RhConfig::Environment::TryGetIntegerValue("EventPipeBufferGuardLevel", &value)) | ||
| { | ||
| EP_ASSERT(value <= UINT32_MAX); | ||
| return static_cast<uint32_t>(value); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| /* | ||
| * EventPipeSampleProfiler. | ||
| */ | ||
|
|
@@ -1803,5 +1820,32 @@ ep_rt_volatile_store_ptr_without_barrier ( | |
| ep_rt_aot_volatile_store_ptr_without_barrier(ptr, value); | ||
| } | ||
|
|
||
| /* | ||
| * Memory Protection | ||
| */ | ||
|
|
||
| static | ||
| inline | ||
| bool | ||
| ep_rt_vprotect ( | ||
| void *addr, | ||
| size_t length, | ||
| EventPipePageProtection protection) | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| /* | ||
| * Fail fast | ||
| */ | ||
|
|
||
| static | ||
| inline | ||
| void | ||
| ep_rt_fatal_error_with_message (const ep_char8_t *message) | ||
| { | ||
| /* Not implemented, no-op */ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should make sure NativeAOT has an implementation of this too. Ideally similar to above we could have a shared minipal_raise_fatal_error() function that no longer needs runtime-specific callouts. Doing that depends on the fatal error handling staying simple, runtime agnostic, and directly aligning with underlying OS APIs.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Fatal error handling includes runtime-specific crash dump and watson logic currently... |
||
| } | ||
|
|
||
| #endif /* ENABLE_PERFTRACING */ | ||
| #endif /* EVENTPIPE_RT_AOT_H */ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,11 +15,13 @@ | |
| #include <eventpipe/ep-string.h> | ||
| #include "fstream.h" | ||
| #include "typestring.h" | ||
| #include "clrhost.h" | ||
| #include "clrversion.h" | ||
| #include "hostinformation.h" | ||
| #include <minipal/guid.h> | ||
| #include <minipal/strings.h> | ||
| #include <minipal/time.h> | ||
| #include <stdio.h> | ||
|
|
||
| #undef EP_INFINITE_WAIT | ||
| #define EP_INFINITE_WAIT INFINITE | ||
|
|
@@ -560,6 +562,15 @@ ep_rt_config_value_get_enable_stackwalk (void) | |
| return CLRConfig::GetConfigValue(CLRConfig::INTERNAL_EventPipeEnableStackwalk) != 0; | ||
| } | ||
|
|
||
| static | ||
| inline | ||
| uint32_t | ||
| ep_rt_config_value_get_buffer_guard_level (void) | ||
| { | ||
| STATIC_CONTRACT_NOTHROW; | ||
| return CLRConfig::GetConfigValue (CLRConfig::INTERNAL_EventPipeBufferGuardLevel); | ||
| } | ||
|
|
||
| /* | ||
| * EventPipeSampleProfiler. | ||
| */ | ||
|
|
@@ -1920,5 +1931,48 @@ ep_rt_volatile_store_ptr_without_barrier ( | |
| VolatileStoreWithoutBarrier<void *> ((void **)ptr, value); | ||
| } | ||
|
|
||
| /* | ||
| * Memory Protection | ||
| */ | ||
|
|
||
| static | ||
| inline | ||
| bool | ||
| ep_rt_vprotect ( | ||
| void *addr, | ||
| size_t length, | ||
| EventPipePageProtection protection) | ||
| { | ||
| STATIC_CONTRACT_NOTHROW; | ||
| DWORD oldProtect; | ||
| bool result = false; | ||
|
|
||
| if (protection == EP_PAGE_PROTECTION_READONLY) | ||
| result = ClrVirtualProtect (addr, length, PAGE_READONLY, &oldProtect); | ||
| else if (protection == EP_PAGE_PROTECTION_READWRITE) | ||
| result = ClrVirtualProtect (addr, length, PAGE_READWRITE, &oldProtect); | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| /* | ||
| * Fail fast | ||
| * Uses COR_E_EXECUTIONENGINE to classify as runtime/internal corruption and routes through EEPOLICY | ||
| * to ensure proper crash dump and diagnostic reporting. | ||
| */ | ||
| static | ||
| EP_ALWAYS_INLINE | ||
| void | ||
| ep_rt_fatal_error_with_message (const ep_char8_t *message) | ||
| { | ||
| if (message != nullptr) { | ||
|
||
| fputs(reinterpret_cast<const char*>(message), stderr); | ||
| fputc('\n', stderr); | ||
| fflush(stderr); | ||
| } | ||
|
|
||
| RaiseFailFastException (NULL, NULL, 0); | ||
| } | ||
|
|
||
| #endif /* ENABLE_PERFTRACING */ | ||
| #endif /* __EVENTPIPE_RT_CORECLR_H__ */ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -637,6 +637,21 @@ ep_rt_config_value_get_enable_stackwalk (void) | |
| return value_uint32_t != 0; | ||
| } | ||
|
|
||
| static | ||
| inline | ||
| uint32_t | ||
| ep_rt_config_value_get_buffer_guard_level (void) | ||
| { | ||
| uint32_t buffer_guard_level = 0; | ||
| gchar *value = g_getenv ("DOTNET_EventPipeBufferGuardLevel"); | ||
| if (!value) | ||
| value = g_getenv ("COMPlus_EventPipeBufferGuardLevel"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not need to support for COMPlus_ for any new variables. (We are keeping COMPlus_ path for now for existing variables. Ideally, we will delete it at some point.) |
||
| if (value) | ||
| buffer_guard_level = (uint32_t)atoi (value); | ||
| g_free (value); | ||
| return buffer_guard_level; | ||
| } | ||
|
|
||
| /* | ||
| * EventPipeSampleProfiler. | ||
| */ | ||
|
|
@@ -1901,6 +1916,34 @@ ep_rt_volatile_store_ptr_without_barrier ( | |
| *ptr = value; | ||
| } | ||
|
|
||
|
|
||
| /* | ||
| * Memory Protection | ||
| */ | ||
|
|
||
| static | ||
| inline | ||
| bool | ||
| ep_rt_vprotect ( | ||
| void *addr, | ||
| size_t length, | ||
| EventPipePageProtection protection) | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| /* | ||
| * Fail fast | ||
| */ | ||
|
|
||
| static | ||
| inline | ||
| void | ||
| ep_rt_fatal_error_with_message (const ep_char8_t *message) | ||
| { | ||
| /* Not implemented, no-op */ | ||
| } | ||
|
|
||
| /* | ||
| * EventPipe Native Events. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -472,17 +472,17 @@ buffer_manager_allocate_buffer_for_thread ( | |
| uint32_t buffer_size = base_buffer_size * size_multiplier; | ||
| EP_ASSERT(buffer_size > 0); | ||
|
|
||
|
|
||
| buffer_size = EP_MAX (request_size, buffer_size); | ||
| uint32_t guard_overhead = (buffer_manager->buffer_guard_level > EP_BUFFER_GUARD_LEVEL_NONE) ? EP_BUFFER_HEADER_GUARD_SIZE + EP_BUFFER_FOOTER_GUARD_SIZE : 0; | ||
| buffer_size = EP_MAX (request_size + guard_overhead, buffer_size); | ||
|
|
||
| // Don't allow the buffer size to exceed 1MB. | ||
| const uint32_t max_buffer_size = 1024 * 1024; | ||
| buffer_size = EP_MIN (buffer_size, max_buffer_size); | ||
|
|
||
|
|
||
| // Make sure that buffer size >= request size so that the buffer size does not | ||
| // Make sure that buffer size >= request size + guard overhead so that the buffer size does not | ||
| // determine the max event size. | ||
| EP_ASSERT (request_size <= buffer_size); | ||
| EP_ASSERT (request_size + guard_overhead <= buffer_size); | ||
|
|
||
| // Make the buffer size fit into with pagesize-aligned block, since ep_rt_valloc0 expects page-aligned sizes to be passed as arguments | ||
| buffer_size = (buffer_size + ep_rt_system_get_alloc_granularity () - 1) & ~(uint32_t)(ep_rt_system_get_alloc_granularity () - 1); | ||
|
|
@@ -493,7 +493,7 @@ buffer_manager_allocate_buffer_for_thread ( | |
|
|
||
| // The sequence counter is exclusively mutated on this thread so this is a thread-local read. | ||
| sequence_number = ep_thread_session_state_get_volatile_sequence_number (thread_session_state); | ||
| new_buffer = ep_buffer_alloc (buffer_size, ep_thread_session_state_get_thread (thread_session_state), sequence_number); | ||
| new_buffer = ep_buffer_alloc (buffer_size, ep_thread_session_state_get_thread (thread_session_state), sequence_number, buffer_manager->buffer_guard_level); | ||
| ep_raise_error_if_nok (new_buffer != NULL); | ||
|
|
||
| // Adding a buffer to the buffer list requires us to take the lock. | ||
|
|
@@ -808,6 +808,7 @@ ep_buffer_manager_alloc ( | |
| size_t max_size_of_all_buffers, | ||
| size_t sequence_point_allocation_budget) | ||
| { | ||
| uint32_t buffer_guard_level; | ||
| EventPipeBufferManager *instance = ep_rt_object_alloc (EventPipeBufferManager); | ||
| ep_raise_error_if_nok (instance != NULL); | ||
|
|
||
|
|
@@ -851,6 +852,9 @@ ep_buffer_manager_alloc ( | |
| instance->remaining_sequence_point_alloc_budget = sequence_point_allocation_budget; | ||
| } | ||
|
|
||
| buffer_guard_level = EP_MIN (ep_rt_config_value_get_buffer_guard_level (), EP_BUFFER_GUARD_LEVEL_PROTECT_OUTSIDE_WRITES); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this is trying to clamp the range into valid enum values. I'd suggest define EP_BUFFER_GUARD_LEVEL_MAX = EP_BUFFER_GUARD_LEVEL_PROTECT_OUTSIDE_WRITES in the enum definition, then use that MAX value here. It better conveys the intent and its more likely to get updated correctly if we ever change the levels in the future. |
||
| instance->buffer_guard_level = (EventPipeBufferGuardLevel)buffer_guard_level; | ||
|
|
||
| ep_on_exit: | ||
| return instance; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jkotas - is there some guiding principle on what we add to minipal and what should stay out?
Today EventPipe code relies on runtime-specific callouts for various OS functionality but I'm hoping we can shift that trajectory to depend more on minipal directly. This spot seems like an appealing example where we'd like to invoke an OS API but the existing pattern takes us through runtime-specific wrappers first. Adding VirtualProtect to minipal and using it feels like a good approach to me but I want to make sure I'm not abusing the intent of minipal.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good candidates for minipal are unambiguous trivial methods used from number of different places. "get current time stamp" is a perfect example.
minipal is not meant to wrap everything with platform specific implementation.
I am not sure whether mprotect is a good candidate for minimap. Most places that call mprotect tend to come with their own unique requirements.
This was setup years ago to work around build system limitations. It does not make sense. It would make a lot more sense for eventpipe to have OS-specific PALs and avoid trying to fit into runtime-specific wrappers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Jan! We'll create our own EventPipe PAL as it sounds like these aren't appropriate for minipal.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The EventPipe runtime shim was originally put into place to make majority of EventPipe code runtime agnostic, but still reuse runtime unique implementation of low-level artifacts like IO, lock, threading, atomics, mainly keeping the EventPipe<->runtime interaction stable for CoreCLR while porting code from C++ to C and integrated with Mono, originally it even reused each runtime container implementation, but since then, I broken out that part into native/containers and if we don't think there is much value continue using runtime specific implementations of these low level artifacts shimmed by EventPipe runtime layer, then we should probably move towards one EventPipe OS PAL source file shared by Mono/CoreCLR/NAOT.
There will still be some runtime specific things that will stay in the runtime shim layer, but it will be smaller, and it will be simpler to run EventPipe standalone, making it simpler to get our low-level native runtime tests running outside of runtime. I believe some of these things would potentially end up in minipal, we already have some artifacts that could be used by EventPipe in minipal like, mutex, hig-res timers, utf8-ucs2 conversions and I had some volatile/atomics functions in another PR that would suite minipal and EventPipe as well.
I had an ambition for a long time to get our low-level native EventPipe and container tests currently under Mono, https://github.com/dotnet/runtime/tree/main/src/mono/mono/eventpipe/test, running as a runtime test (as a shared native library or separate binary executed from test) running on all runtimes, part of that was to make EventPipe less dependent on runtime specific artifacts, moving towards one EventPipe OS PAL shared by all runtimes. If we believe that is a good direction, then doing that work could start moving us towards an EventPipe OS PAL.