Skip to content

Commit 8550883

Browse files
committed
Implement custom exception approach for errors from C code
1 parent 8984258 commit 8550883

16 files changed

+79
-42
lines changed

ext/datadog_profiling_native_extension/clock_id_from_pthread.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ void self_test_clock_id(void) {
1818
rb_nativethread_id_t expected_pthread_id = pthread_self();
1919
rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current());
2020

21-
if (expected_pthread_id != actual_pthread_id) rb_raise(rb_eRuntimeError, "pthread_id_for() self-test failed");
21+
if (expected_pthread_id != actual_pthread_id) rb_raise(datadog_profiling_error_class, "pthread_id_for() self-test failed");
2222
}
2323

2424
// Safety: This function is assumed never to raise exceptions by callers

ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
289289
after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ||
290290
after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID
291291
) {
292-
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
292+
rb_raise(datadog_profiling_error_class, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
293293
}
294294
#else
295295
gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
@@ -473,7 +473,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
473473
if (old_state != NULL) {
474474
if (is_thread_alive(old_state->owner_thread)) {
475475
rb_raise(
476-
rb_eRuntimeError,
476+
datadog_profiling_error_class,
477477
"Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"
478478
);
479479
} else {
@@ -1284,7 +1284,7 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {
12841284

12851285
static void delayed_error(cpu_and_wall_time_worker_state *state, const char *error) {
12861286
// If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
1287-
stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
1287+
stop_state(state, rb_exc_new_cstr(datadog_profiling_error_class, error));
12881288
}
12891289

12901290
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {

ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) {
369369

370370
long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
371371
if (now_ns == 0) {
372-
rb_raise(rb_eRuntimeError, "failed to get clock time");
372+
rb_raise(datadog_profiling_error_class, "failed to get clock time");
373373
}
374374
discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);
375375

ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
119119
};
120120

121121
if (label_pos > max_label_count) {
122-
rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
122+
rb_raise(datadog_profiling_error_class, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
123123
}
124124

125125
return label_pos;

ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ static void *run_idle_sampling_loop(void *state_ptr) {
153153
// Process pending action
154154
if (next_action == ACTION_RUN) {
155155
if (run_action_function == NULL) {
156-
grab_gvl_and_raise(rb_eRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop");
156+
grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected NULL run_action_function in run_idle_sampling_loop");
157157
}
158158

159159
run_action_function();

ext/datadog_profiling_native_extension/collectors_stack.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,11 +284,11 @@ void sample_thread(
284284
// here, but >= 0 makes this easier to understand/debug.
285285
bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0;
286286

287-
if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label");
287+
if (cpu_or_wall_sample && state_label == NULL) rb_raise(datadog_profiling_error_class, "BUG: Unexpected missing state_label");
288288

289289
if (has_cpu_time) {
290290
state_label->str = DDOG_CHARSLICE_C("had cpu");
291-
if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting");
291+
if (labels.is_gvl_waiting_state) rb_raise(datadog_profiling_error_class, "BUG: Unexpected combination of cpu-time with is_gvl_waiting");
292292
}
293293

294294
int top_of_stack_position = captured_frames - 1;

ext/datadog_profiling_native_extension/collectors_thread_context.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
831831
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
832832

833833
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
834-
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
834+
rb_raise(datadog_profiling_error_class, "BUG: Unexpected call to sample_after_gc without valid GC information available");
835835
}
836836

837837
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
@@ -998,7 +998,7 @@ static void trigger_sample_for_thread(
998998
// @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
999999
// changes the code erroneously and remove it entirely?
10001000
if (label_pos > max_label_count) {
1001-
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
1001+
rb_raise(datadog_profiling_error_class, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
10021002
}
10031003

10041004
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
@@ -1295,7 +1295,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
12951295
elapsed_time_ns = 0;
12961296
} else {
12971297
// We don't expect non-wall time to go backwards, so let's flag this as a bug
1298-
rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
1298+
rb_raise(datadog_profiling_error_class, "BUG: Unexpected negative elapsed_time_ns between samples");
12991299
}
13001300
}
13011301

@@ -1961,7 +1961,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
19611961
thread_context_collector_state *state;
19621962
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);
19631963

1964-
if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled");
1964+
if (!state->timeline_enabled) rb_raise(datadog_profiling_error_class, "GVL profiling requires timeline to be enabled");
19651965

19661966
intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);
19671967

ext/datadog_profiling_native_extension/heap_recorder.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
300300
}
301301

302302
if (heap_recorder->active_recording != NULL) {
303-
rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
303+
rb_raise(datadog_profiling_error_class, "Detected consecutive heap allocation recording starts without end.");
304304
}
305305

306306
if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate ||
@@ -323,7 +323,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
323323

324324
VALUE ruby_obj_id = rb_obj_id(new_obj);
325325
if (!FIXNUM_P(ruby_obj_id)) {
326-
rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
326+
rb_raise(datadog_profiling_error_class, "Detected a bignum object id. These are not supported by heap profiling.");
327327
}
328328

329329
heap_recorder->active_recording = object_record_new(
@@ -371,7 +371,7 @@ static VALUE end_heap_allocation_recording(VALUE protect_args) {
371371

372372
if (active_recording == NULL) {
373373
// Recording ended without having been started?
374-
rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
374+
rb_raise(datadog_profiling_error_class, "Ended a heap recording that was not started");
375375
}
376376
// From now on, mark the global active recording as invalid so we can short-circuit at any point
377377
// and not end up with a still active recording. the local active_recording still holds the
@@ -487,14 +487,14 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
487487

488488
if (heap_recorder->object_records_snapshot != NULL) {
489489
// we could trivially handle this but we raise to highlight and catch unexpected usages.
490-
rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
490+
rb_raise(datadog_profiling_error_class, "New heap recorder iteration prepared without the previous one having been finished.");
491491
}
492492

493493
heap_recorder_update(heap_recorder, /* full_update: */ true);
494494

495495
heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
496496
if (heap_recorder->object_records_snapshot == NULL) {
497-
rb_raise(rb_eRuntimeError, "Failed to create heap snapshot.");
497+
rb_raise(datadog_profiling_error_class, "Failed to create heap snapshot.");
498498
}
499499
}
500500

@@ -505,7 +505,7 @@ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) {
505505

506506
if (heap_recorder->object_records_snapshot == NULL) {
507507
// we could trivially handle this but we raise to highlight and catch unexpected usages.
508-
rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared.");
508+
rb_raise(datadog_profiling_error_class, "Heap recorder iteration finished without having been prepared.");
509509
}
510510

511511
st_free_table(heap_recorder->object_records_snapshot);
@@ -733,19 +733,19 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec
733733
// needed to fully build the object_record.
734734
active_recording->heap_record = heap_record;
735735
if (heap_record->num_tracked_objects == UINT32_MAX) {
736-
rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record");
736+
rb_raise(datadog_profiling_error_class, "Reached maximum number of tracked objects for heap record");
737737
}
738738
heap_record->num_tracked_objects++;
739739

740740
int existing_error = st_update(heap_recorder->object_records, active_recording->obj_id, update_object_record_entry, (st_data_t) active_recording);
741741
if (existing_error) {
742742
object_record *existing_record = NULL;
743743
st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record);
744-
if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record");
744+
if (existing_record == NULL) rb_raise(datadog_profiling_error_class, "Unexpected NULL when reading existing record");
745745

746746
VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record);
747747
VALUE new_inspect = object_record_inspect(heap_recorder, active_recording);
748-
rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
748+
rb_raise(datadog_profiling_error_class, "Object ids are supposed to be unique. We got 2 allocation recordings with "
749749
"the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect);
750750
}
751751
}
@@ -781,7 +781,7 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec
781781
}
782782

783783
if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) {
784-
rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record");
784+
rb_raise(datadog_profiling_error_class, "Attempted to cleanup an untracked heap_record");
785785
};
786786
heap_record_free(heap_recorder, heap_record);
787787
}
@@ -791,14 +791,14 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj
791791
// (See PROF-10656 Datadog-internal for details). Just in case, I've sprinkled a bunch of NULL tests in this function for now.
792792
// Once we figure out the issue we can get rid of them again.
793793

794-
if (heap_recorder == NULL) rb_raise(rb_eRuntimeError, "heap_recorder was NULL in on_committed_object_record_cleanup");
795-
if (heap_recorder->heap_records == NULL) rb_raise(rb_eRuntimeError, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup");
796-
if (record == NULL) rb_raise(rb_eRuntimeError, "record was NULL in on_committed_object_record_cleanup");
794+
if (heap_recorder == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder was NULL in on_committed_object_record_cleanup");
795+
if (heap_recorder->heap_records == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup");
796+
if (record == NULL) rb_raise(datadog_profiling_error_class, "record was NULL in on_committed_object_record_cleanup");
797797

798798
// Starting with the associated heap record. There will now be one less tracked object pointing to it
799799
heap_record *heap_record = record->heap_record;
800800

801-
if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup");
801+
if (heap_record == NULL) rb_raise(datadog_profiling_error_class, "heap_record was NULL in on_committed_object_record_cleanup");
802802

803803
heap_record->num_tracked_objects--;
804804

@@ -862,7 +862,7 @@ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location l
862862
uint16_t frames_len = locations.len;
863863
if (frames_len > MAX_FRAMES_LIMIT) {
864864
// This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism
865-
rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
865+
rb_raise(datadog_profiling_error_class, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len);
866866
}
867867
heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above
868868
stack->num_tracked_objects = 0;
@@ -933,21 +933,21 @@ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId
933933

934934
ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id);
935935
if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
936-
rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some));
936+
rb_raise(datadog_profiling_error_class, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some));
937937
}
938938
}
939939

940940
static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) {
941941
ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids);
942942
if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) {
943-
rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some));
943+
rb_raise(datadog_profiling_error_class, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some));
944944
}
945945
}
946946

947947
static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) {
948948
ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id);
949949
if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) {
950-
rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err));
950+
rb_raise(datadog_profiling_error_class, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err));
951951
}
952952
VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message);
953953
ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok);

ext/datadog_profiling_native_extension/profiling.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) {
5555
rb_define_singleton_method(native_extension_module, "native_working?", native_working_p, 0);
5656
rb_funcall(native_extension_module, rb_intern("private_class_method"), 1, ID2SYM(rb_intern("native_working?")));
5757

58+
// Initialize the ProfilingError exception class reference
59+
// This exception class should be defined in Ruby code (lib/datadog/profiling.rb)
60+
datadog_profiling_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError"));
61+
rb_global_variable(&datadog_profiling_error_class);
62+
5863
ruby_helpers_init();
5964
collectors_cpu_and_wall_time_worker_init(profiling_module);
6065
collectors_discrete_dynamic_sampler_init(profiling_module);
@@ -115,7 +120,7 @@ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE except
115120
grab_gvl_and_raise(args.exception_class, "%s", args.test_message);
116121
}
117122

118-
rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen");
123+
rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen");
119124
}
120125

121126
static void *trigger_grab_gvl_and_raise(void *trigger_args) {
@@ -151,7 +156,7 @@ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE
151156
grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message);
152157
}
153158

154-
rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen");
159+
rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen");
155160
}
156161

157162
static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) {
@@ -246,7 +251,7 @@ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VA
246251

247252
replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler);
248253

249-
if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread");
254+
if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(datadog_profiling_error_class, "Could not signal background_thread");
250255

251256
VALUE result = rb_hash_new();
252257
rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]);

ext/datadog_profiling_native_extension/ruby_helpers.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ static ID _id2ref_id = Qnil;
1212
static ID inspect_id = Qnil;
1313
static ID to_s_id = Qnil;
1414

15+
// Global reference to Datadog::Profiling::ProfilingError exception class
16+
// Initialized in profiling.c during extension initialization
17+
VALUE datadog_profiling_error_class = Qnil;
18+
1519
void ruby_helpers_init(void) {
1620
rb_global_variable(&module_object_space);
1721

@@ -44,7 +48,7 @@ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) {
4448

4549
if (is_current_thread_holding_the_gvl()) {
4650
rb_raise(
47-
rb_eRuntimeError,
51+
datadog_profiling_error_class,
4852
"grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'",
4953
args.exception_message
5054
);
@@ -76,7 +80,7 @@ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...)
7680

7781
if (is_current_thread_holding_the_gvl()) {
7882
rb_raise(
79-
rb_eRuntimeError,
83+
datadog_profiling_error_class,
8084
"grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'",
8185
syserr_errno,
8286
args.exception_message

0 commit comments

Comments
 (0)