From 8e2bf949447bc5c20d689d6378c8baa568fa2754 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 17 Oct 2025 13:59:25 -0400 Subject: [PATCH 01/10] Implement custom exception approach for errors from C code --- .../clock_id_from_pthread.c | 2 +- .../collectors_cpu_and_wall_time_worker.c | 6 ++-- .../collectors_discrete_dynamic_sampler.c | 2 +- .../collectors_gc_profiling_helper.c | 2 +- .../collectors_idle_sampling_helper.c | 2 +- .../collectors_stack.c | 4 +-- .../collectors_thread_context.c | 8 ++--- .../heap_recorder.c | 36 +++++++++---------- .../profiling.c | 11 ++++-- .../ruby_helpers.c | 8 +++-- .../ruby_helpers.h | 4 +++ .../setup_signal_handler.c | 2 +- .../stack_recorder.c | 6 ++-- lib/datadog/core/telemetry/logging.rb | 10 ++++++ lib/datadog/profiling.rb | 6 ++++ sig/datadog/core/telemetry/logging.rbs | 4 +++ 16 files changed, 73 insertions(+), 40 deletions(-) diff --git a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c index be033110f73..9ffc1860e1b 100644 --- a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c @@ -18,7 +18,7 @@ void self_test_clock_id(void) { rb_nativethread_id_t expected_pthread_id = pthread_self(); rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current()); - if (expected_pthread_id != actual_pthread_id) rb_raise(rb_eRuntimeError, "pthread_id_for() self-test failed"); + if (expected_pthread_id != actual_pthread_id) rb_raise(datadog_profiling_error_class, "pthread_id_for() self-test failed"); } // Safety: This function is assumed never to raise exceptions by callers diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 022c4588705..cd9e5aaebda 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ) { - rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); + rb_raise(datadog_profiling_error_class, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); } #else gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround(); @@ -473,7 +473,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) { if (old_state != NULL) { if (is_thread_alive(old_state->owner_thread)) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread" ); } else { @@ -1284,7 +1284,7 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) { static void delayed_error(cpu_and_wall_time_worker_state *state, const char *error) { // If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop. - stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error)); + stop_state(state, rb_exc_new_cstr(datadog_profiling_error_class, error)); } static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) { diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c index 931adf85cb5..dd089a8db19 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c @@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) { long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); if (now_ns == 0) { - rb_raise(rb_eRuntimeError, "failed to get clock time"); + rb_raise(datadog_profiling_error_class, "failed to get clock time"); } discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns); diff --git a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c index e9dd6c727d9..c9ab53ed64e 100644 --- a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c @@ -119,7 +119,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { }; if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + rb_raise(datadog_profiling_error_class, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } return label_pos; diff --git a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c index fcbc42eaa94..23586d240fb 100644 --- a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c @@ -153,7 +153,7 @@ static void *run_idle_sampling_loop(void *state_ptr) { // Process pending action if (next_action == ACTION_RUN) { if (run_action_function == NULL) { - grab_gvl_and_raise(rb_eRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop"); + grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected NULL run_action_function in run_idle_sampling_loop"); } run_action_function(); diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c index 94ca42cbdca..46f4944e6b8 100644 --- a/ext/datadog_profiling_native_extension/collectors_stack.c +++ b/ext/datadog_profiling_native_extension/collectors_stack.c @@ -284,11 +284,11 @@ void sample_thread( // here, but >= 0 makes this easier to understand/debug. bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0; - if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label"); + if (cpu_or_wall_sample && state_label == NULL) rb_raise(datadog_profiling_error_class, "BUG: Unexpected missing state_label"); if (has_cpu_time) { state_label->str = DDOG_CHARSLICE_C("had cpu"); - if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); + if (labels.is_gvl_waiting_state) rb_raise(datadog_profiling_error_class, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); } int top_of_stack_position = captured_frames - 1; diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index f2cc76c6021..c2c38bae845 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -831,7 +831,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available"); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected call to sample_after_gc without valid GC information available"); } int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata @@ -998,7 +998,7 @@ static void trigger_sample_for_thread( // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone // changes the code erroneously and remove it entirely? if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; @@ -1295,7 +1295,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, elapsed_time_ns = 0; } else { // We don't expect non-wall time to go backwards, so let's flag this as a bug - rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples"); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected negative elapsed_time_ns between samples"); } } @@ -1961,7 +1961,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { thread_context_collector_state *state; TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); - if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled"); + if (!state->timeline_enabled) rb_raise(datadog_profiling_error_class, "GVL profiling requires timeline to be enabled"); intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread); diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index f869d24c6d3..3a6eaee7ccf 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -300,7 +300,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj } if (heap_recorder->active_recording != NULL) { - rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end."); + rb_raise(datadog_profiling_error_class, "Detected consecutive heap allocation recording starts without end."); } if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate || @@ -323,7 +323,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj VALUE ruby_obj_id = rb_obj_id(new_obj); if (!FIXNUM_P(ruby_obj_id)) { - rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); + rb_raise(datadog_profiling_error_class, "Detected a bignum object id. These are not supported by heap profiling."); } heap_recorder->active_recording = object_record_new( @@ -371,7 +371,7 @@ static VALUE end_heap_allocation_recording(VALUE protect_args) { if (active_recording == NULL) { // Recording ended without having been started? - rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + rb_raise(datadog_profiling_error_class, "Ended a heap recording that was not started"); } // From now on, mark the global active recording as invalid so we can short-circuit at any point // and not end up with a still active recording. the local active_recording still holds the @@ -487,14 +487,14 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot != NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished."); + rb_raise(datadog_profiling_error_class, "New heap recorder iteration prepared without the previous one having been finished."); } heap_recorder_update(heap_recorder, /* full_update: */ true); heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records); if (heap_recorder->object_records_snapshot == NULL) { - rb_raise(rb_eRuntimeError, "Failed to create heap snapshot."); + rb_raise(datadog_profiling_error_class, "Failed to create heap snapshot."); } } @@ -505,7 +505,7 @@ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot == NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared."); + rb_raise(datadog_profiling_error_class, "Heap recorder iteration finished without having been prepared."); } st_free_table(heap_recorder->object_records_snapshot); @@ -733,7 +733,7 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec // needed to fully build the object_record. active_recording->heap_record = heap_record; if (heap_record->num_tracked_objects == UINT32_MAX) { - rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record"); + rb_raise(datadog_profiling_error_class, "Reached maximum number of tracked objects for heap record"); } heap_record->num_tracked_objects++; @@ -741,11 +741,11 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec if (existing_error) { object_record *existing_record = NULL; st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record); - if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record"); + if (existing_record == NULL) rb_raise(datadog_profiling_error_class, "Unexpected NULL when reading existing record"); VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record); VALUE new_inspect = object_record_inspect(heap_recorder, active_recording); - rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " + rb_raise(datadog_profiling_error_class, "Object ids are supposed to be unique. We got 2 allocation recordings with " "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect); } } @@ -781,7 +781,7 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec } if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) { - rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record"); + rb_raise(datadog_profiling_error_class, "Attempted to cleanup an untracked heap_record"); }; heap_record_free(heap_recorder, heap_record); } @@ -791,14 +791,14 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj // (See PROF-10656 Datadog-internal for details). Just in case, I've sprinkled a bunch of NULL tests in this function for now. // Once we figure out the issue we can get rid of them again. - if (heap_recorder == NULL) rb_raise(rb_eRuntimeError, "heap_recorder was NULL in on_committed_object_record_cleanup"); - if (heap_recorder->heap_records == NULL) rb_raise(rb_eRuntimeError, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); - if (record == NULL) rb_raise(rb_eRuntimeError, "record was NULL in on_committed_object_record_cleanup"); + if (heap_recorder == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder was NULL in on_committed_object_record_cleanup"); + if (heap_recorder->heap_records == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); + if (record == NULL) rb_raise(datadog_profiling_error_class, "record was NULL in on_committed_object_record_cleanup"); // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; - if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup"); + if (heap_record == NULL) rb_raise(datadog_profiling_error_class, "heap_record was NULL in on_committed_object_record_cleanup"); heap_record->num_tracked_objects--; @@ -862,7 +862,7 @@ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location l uint16_t frames_len = locations.len; if (frames_len > MAX_FRAMES_LIMIT) { // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism - rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); + rb_raise(datadog_profiling_error_class, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); } heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above stack->num_tracked_objects = 0; @@ -933,21 +933,21 @@ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_error_class, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_error_class, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) { ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id); if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); + rb_raise(datadog_profiling_error_class, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); } VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message); ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok); diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index bcf97709c14..2fcc91e3e41 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -55,6 +55,11 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { rb_define_singleton_method(native_extension_module, "native_working?", native_working_p, 0); rb_funcall(native_extension_module, rb_intern("private_class_method"), 1, ID2SYM(rb_intern("native_working?"))); + // Initialize the ProfilingError exception class reference + // This exception class should be defined in Ruby code (lib/datadog/profiling.rb) + datadog_profiling_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError")); + rb_global_variable(&datadog_profiling_error_class); + ruby_helpers_init(); collectors_cpu_and_wall_time_worker_init(profiling_module); collectors_discrete_dynamic_sampler_init(profiling_module); @@ -115,7 +120,7 @@ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE except grab_gvl_and_raise(args.exception_class, "%s", args.test_message); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); + rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); } static void *trigger_grab_gvl_and_raise(void *trigger_args) { @@ -151,7 +156,7 @@ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); + rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); } static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) { @@ -246,7 +251,7 @@ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VA replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler); - if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread"); + if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(datadog_profiling_error_class, "Could not signal background_thread"); VALUE result = rb_hash_new(); rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.c b/ext/datadog_profiling_native_extension/ruby_helpers.c index 61b9db1af4f..71a79fb8240 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.c +++ b/ext/datadog_profiling_native_extension/ruby_helpers.c @@ -12,6 +12,10 @@ static ID _id2ref_id = Qnil; static ID inspect_id = Qnil; static ID to_s_id = Qnil; +// Global reference to Datadog::Profiling::ProfilingError exception class +// Initialized in profiling.c during extension initialization +VALUE datadog_profiling_error_class = Qnil; + void ruby_helpers_init(void) { rb_global_variable(&module_object_space); @@ -44,7 +48,7 @@ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) { if (is_current_thread_holding_the_gvl()) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'", args.exception_message ); @@ -76,7 +80,7 @@ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) if (is_current_thread_holding_the_gvl()) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'", syserr_errno, args.exception_message diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.h b/ext/datadog_profiling_native_extension/ruby_helpers.h index 5c135a21d14..b59b14a1af3 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.h +++ b/ext/datadog_profiling_native_extension/ruby_helpers.h @@ -3,6 +3,10 @@ #include #include "datadog_ruby_common.h" +// Global reference to Datadog::Profiling::ProfilingError exception class +// This is initialized in profiling.c during extension initialization +extern VALUE datadog_profiling_error_class; + // Initialize internal data needed by some ruby helpers. Should be called during start, before any actual // usage of ruby helpers. void ruby_helpers_init(void); diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.c b/ext/datadog_profiling_native_extension/setup_signal_handler.c index fa4bb27e95c..bab7897710d 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.c +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.c @@ -72,7 +72,7 @@ static void install_sigprof_signal_handler_internal( } rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "Could not install profiling signal handler (%s): There's a pre-existing SIGPROF signal handler", handler_pretty_name ); diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index bc3b3439500..05bffa5bee4 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -770,10 +770,10 @@ static void build_heap_profile_without_gvl(stack_recorder_state *state, profile_ // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the // same locks but acquiring them in different order). if (!iterated) { - grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: iteration cancelled"); + grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: iteration cancelled"); } else if (iteration_context.error) { - grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); + grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: %s", iteration_context.error_msg); } } @@ -835,7 +835,7 @@ static profile_slot* serializer_flip_active_and_inactive_slots(stack_recorder_st int previously_active_slot = state->active_slot; if (previously_active_slot != 1 && previously_active_slot != 2) { - grab_gvl_and_raise(rb_eRuntimeError, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); + grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); } pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two; diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index 036bb78d8f4..9b58c86d20b 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -60,6 +60,16 @@ def report(exception, level: :error, description: nil) log!(event) end + private + + def safe_exception_message(exception) + # Only include exception messages from ProfilingError, as those are guaranteed to be created by us + # with constant, PII-safe messages + if defined?(Datadog::Profiling::ProfilingError) && exception.is_a?(Datadog::Profiling::ProfilingError) + exception.message + end + end + def error(description) event = Event::Log.new(message: description, level: :error) diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index f867e3f9793..f644cc32b7a 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -7,6 +7,12 @@ module Datadog # Datadog Continuous Profiler implementation: https://docs.datadoghq.com/profiler/ module Profiling + # Custom exception class for profiler errors. + # This exception class is used by the profiler's C code to signal errors. + # Telemetry will only include exception messages for instances of this class, + # ensuring that only known-safe messages (created by Datadog code) are reported. + class ProfilingError < StandardError; end + def self.supported? unsupported_reason.nil? end diff --git a/sig/datadog/core/telemetry/logging.rbs b/sig/datadog/core/telemetry/logging.rbs index f64aa023f3c..a828b564b27 100644 --- a/sig/datadog/core/telemetry/logging.rbs +++ b/sig/datadog/core/telemetry/logging.rbs @@ -15,6 +15,10 @@ module Datadog def report: (Exception exception, ?level: Symbol, ?description: String?) -> void def error: (String description) -> void + + private + + def safe_exception_message: (Exception exception) -> String? end end end From 27067c077fd0e90f0504e763f372ae0546657934 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 14:15:49 -0500 Subject: [PATCH 02/10] Update to support constant_exception_message and ProfilingError This gets us closer to allowing these errors to be sent to telemetry. --- lib/datadog/core/telemetry/logging.rb | 14 +++++-- sig/datadog/profiling.rbs | 3 ++ spec/datadog/core/telemetry/logging_spec.rb | 38 +++++++++++++++++++ .../profiling/collectors/stack_spec.rb | 4 +- .../collectors/thread_context_spec.rb | 4 +- .../profiling/native_extension_spec.rb | 8 ++-- spec/datadog/profiling/stack_recorder_spec.rb | 4 +- 7 files changed, 61 insertions(+), 14 deletions(-) diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index 9b58c86d20b..e668f270214 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -49,7 +49,13 @@ def report(exception, level: :error, description: nil) # Anonymous exceptions to be logged as message = +"#{exception.class.name || exception.class.inspect}" # standard:disable Style/RedundantInterpolation - message << ": #{description}" if description + exception_message = constant_exception_message(exception) + + if description || exception_message + message << ':' + message << " #{description}" if description + message << " (#{exception_message})" if exception_message + end event = Event::Log.new( message: message, @@ -62,9 +68,9 @@ def report(exception, level: :error, description: nil) private - def safe_exception_message(exception) - # Only include exception messages from ProfilingError, as those are guaranteed to be created by us - # with constant, PII-safe messages + def constant_exception_message(exception) + # Only include exception messages from ProfilingError, as those are guaranteed to contain + # constant strings created by Datadog code (not dynamic/PII content) if defined?(Datadog::Profiling::ProfilingError) && exception.is_a?(Datadog::Profiling::ProfilingError) exception.message end diff --git a/sig/datadog/profiling.rbs b/sig/datadog/profiling.rbs index 88a8289d082..cd76e38817b 100644 --- a/sig/datadog/profiling.rbs +++ b/sig/datadog/profiling.rbs @@ -1,5 +1,8 @@ module Datadog module Profiling + class ProfilingError < StandardError + end + def self.supported?: () -> bool def self.unsupported_reason: () -> ::String? def self.start_if_enabled: () -> bool diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index 363bc1b380f..184d1928a06 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -82,6 +82,44 @@ def log!(_event) end end end + + context 'with ProfilingError' do + before do + skip unless defined?(Datadog::Profiling::ProfilingError) + end + + it 'includes the exception message in telemetry' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Datadog::Profiling::ProfilingError: (This is a safe profiler error)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + end + + begin + raise Datadog::Profiling::ProfilingError, 'This is a safe profiler error' + rescue => e + component.report(e, level: :error) + end + end + + context 'with description' do + it 'includes both description and exception message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Datadog::Profiling::ProfilingError: Profiler failed to start (Failed to initialize native extension)', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + end + + begin + raise Datadog::Profiling::ProfilingError, 'Failed to initialize native extension' + rescue => e + component.report(e, level: :error, description: 'Profiler failed to start') + end + end + end + end end describe '.error' do diff --git a/spec/datadog/profiling/collectors/stack_spec.rb b/spec/datadog/profiling/collectors/stack_spec.rb index 7a9ecf8997a..a1f427f5db3 100644 --- a/spec/datadog/profiling/collectors/stack_spec.rb +++ b/spec/datadog/profiling/collectors/stack_spec.rb @@ -330,7 +330,7 @@ def save_rounding_mode # rubocop:disable Lint/UselessMethodDefinition it do expect { sample_and_decode(background_thread, :labels, is_gvl_waiting_state: true) - }.to raise_error(RuntimeError, /BUG: .* is_gvl_waiting/) + }.to raise_error(Datadog::Profiling::ProfilingError, /BUG: .* is_gvl_waiting/) end end @@ -366,7 +366,7 @@ def save_rounding_mode # rubocop:disable Lint/UselessMethodDefinition let(:metric_values) { {"cpu-samples" => 1} } it "raises an exception" do - expect { gathered_stack }.to raise_error(RuntimeError, /BUG: Unexpected missing state_label/) + expect { gathered_stack }.to raise_error(Datadog::Profiling::ProfilingError, /BUG: Unexpected missing state_label/) end end diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index dedf7f9b90c..c6a65717f1b 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -1582,7 +1582,7 @@ def sample_and_check(expected_state:) context "when called before on_gc_start/on_gc_finish" do it do - expect { sample_after_gc(allow_exception: true) }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + expect { sample_after_gc(allow_exception: true) }.to raise_error(Datadog::Profiling::ProfilingError, /Unexpected call to sample_after_gc/) end end @@ -1601,7 +1601,7 @@ def sample_and_check(expected_state:) sample_after_gc expect { sample_after_gc(allow_exception: true) } - .to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + .to raise_error(Datadog::Profiling::ProfilingError, /Unexpected call to sample_after_gc/) end end diff --git a/spec/datadog/profiling/native_extension_spec.rb b/spec/datadog/profiling/native_extension_spec.rb index b3fa0c8163a..5aa2f68e2d4 100644 --- a/spec/datadog/profiling/native_extension_spec.rb +++ b/spec/datadog/profiling/native_extension_spec.rb @@ -29,9 +29,9 @@ end context "when called without releasing the gvl" do - it "raises a RuntimeError" do + it "raises a ProfilingError" do expect { described_class::Testing._native_grab_gvl_and_raise(ZeroDivisionError, "this is a test", nil, false) } - .to raise_exception(RuntimeError, /called by thread holding the global VM lock/) + .to raise_exception(Datadog::Profiling::ProfilingError, /called by thread holding the global VM lock/) end end end @@ -58,10 +58,10 @@ end context "when called without releasing the gvl" do - it "raises a RuntimeError" do + it "raises a ProfilingError" do expect do described_class::Testing._native_grab_gvl_and_raise_syserr(Errno::EINTR::Errno, "this is a test", nil, false) - end.to raise_exception(RuntimeError, /called by thread holding the global VM lock/) + end.to raise_exception(Datadog::Profiling::ProfilingError, /called by thread holding the global VM lock/) end end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 3a75be9a6c9..32afba63aba 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -679,7 +679,7 @@ def introduce_distinct_stacktraces(i, obj) expect do Datadog::Profiling::Collectors::Stack::Testing ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels) - end.to raise_error(RuntimeError, /Ended a heap recording/) + end.to raise_error(Datadog::Profiling::ProfilingError, /Ended a heap recording/) end it "does not keep the active slot mutex locked" do @@ -878,7 +878,7 @@ def sample_and_clear context "when serialization fails" do before { expect(described_class).to receive(:_native_serialize).and_return([:error, "test error message"]) } - it { expect { serialize! }.to raise_error(RuntimeError, /test error message/) } + it { expect { serialize! }.to raise_error(Datadog::Profiling::ProfilingError, /test error message/) } end end From 3d908d725685f674d62f4b8af825759a3e7dd9ca Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 15:09:24 -0500 Subject: [PATCH 03/10] Implement two-tier exception strategy for profiler errors --- .../encoded_profile.c | 2 +- .../http_transport.c | 2 +- .../libdatadog_helpers.c | 4 +- .../private_vm_api_access.c | 6 +-- .../profiling.c | 5 +++ .../ruby_helpers.c | 4 ++ .../ruby_helpers.h | 4 ++ .../stack_recorder.c | 22 +++++----- .../unsafe_api_calls_check.c | 2 +- lib/datadog/profiling.rb | 13 ++++-- sig/datadog/profiling.rbs | 3 ++ spec/datadog/core/telemetry/logging_spec.rb | 42 +++++++++++++++++++ 12 files changed, 86 insertions(+), 23 deletions(-) diff --git a/ext/datadog_profiling_native_extension/encoded_profile.c b/ext/datadog_profiling_native_extension/encoded_profile.c index cc28b9e40c5..93922e4676a 100644 --- a/ext/datadog_profiling_native_extension/encoded_profile.c +++ b/ext/datadog_profiling_native_extension/encoded_profile.c @@ -41,7 +41,7 @@ VALUE from_ddog_prof_EncodedProfile(ddog_prof_EncodedProfile profile) { static ddog_ByteSlice get_bytes(ddog_prof_EncodedProfile *state) { ddog_prof_Result_ByteSlice raw_bytes = ddog_prof_EncodedProfile_bytes(state); if (raw_bytes.tag == DDOG_PROF_RESULT_BYTE_SLICE_ERR_BYTE_SLICE) { - rb_raise(rb_eRuntimeError, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); } return raw_bytes.ok; } diff --git a/ext/datadog_profiling_native_extension/http_transport.c b/ext/datadog_profiling_native_extension/http_transport.c index 52e4db67c5d..4be49eb5fd6 100644 --- a/ext/datadog_profiling_native_extension/http_transport.c +++ b/ext/datadog_profiling_native_extension/http_transport.c @@ -112,7 +112,7 @@ static ddog_prof_ProfileExporter_Result create_exporter(VALUE exporter_configura } static void validate_token(ddog_CancellationToken token, const char *file, int line) { - if (token.inner == NULL) rb_raise(rb_eRuntimeError, "Unexpected: Validation token was empty at %s:%d", file, line); + if (token.inner == NULL) rb_raise(datadog_profiling_internal_error_class, "Unexpected: Validation token was empty at %s:%d", file, line); } static VALUE handle_exporter_failure(ddog_prof_ProfileExporter_Result exporter_result) { diff --git a/ext/datadog_profiling_native_extension/libdatadog_helpers.c b/ext/datadog_profiling_native_extension/libdatadog_helpers.c index 16185b8080b..3362b329bbd 100644 --- a/ext/datadog_profiling_native_extension/libdatadog_helpers.c +++ b/ext/datadog_profiling_native_extension/libdatadog_helpers.c @@ -66,7 +66,7 @@ ddog_prof_ManagedStringId intern_or_raise(ddog_prof_ManagedStringStorage string_ ddog_prof_ManagedStringStorageInternResult intern_result = ddog_prof_ManagedStringStorage_intern(string_storage, string); if (intern_result.tag == DDOG_PROF_MANAGED_STRING_STORAGE_INTERN_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); } return intern_result.ok; } @@ -79,6 +79,6 @@ void intern_all_or_raise( ) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_intern_all(string_storage, strings, output_ids, output_ids_size); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_internal_error_class, "Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index dd846376987..70822e3abb2 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -738,7 +738,7 @@ void self_test_mn_enabled(void) { return; #else if (ddtrace_get_ractor()->threads.sched.enable_mn_threads == true) { - rb_raise(rb_eRuntimeError, "Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported"); + rb_raise(datadog_profiling_error_class, "Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported"); } #endif } @@ -871,11 +871,11 @@ bool is_raised_flag_set(VALUE thread) { return thread_struct_from_object(thread) expected_current_fiber = current_fiber_for(rb_thread_current()); } - if (expected_current_fiber != actual_current_fiber) rb_raise(rb_eRuntimeError, "current_fiber_for() self-test failed"); + if (expected_current_fiber != actual_current_fiber) rb_raise(datadog_profiling_error_class, "current_fiber_for() self-test failed"); } #else NORETURN(VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread)); - VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread) { rb_raise(rb_eRuntimeError, "Not implemented for Ruby < 3.1"); } + VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread) { rb_raise(datadog_profiling_error_class, "Not implemented for Ruby < 3.1"); } void self_test_current_fiber_for(void) { } // Nothing to do #endif diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index 2fcc91e3e41..528ff83af91 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -60,6 +60,11 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { datadog_profiling_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError")); rb_global_variable(&datadog_profiling_error_class); + // Initialize the ProfilingInternalError exception class reference + // This exception class should be defined in Ruby code (lib/datadog/profiling.rb) + datadog_profiling_internal_error_class = rb_const_get(profiling_module, rb_intern("ProfilingInternalError")); + rb_global_variable(&datadog_profiling_internal_error_class); + ruby_helpers_init(); collectors_cpu_and_wall_time_worker_init(profiling_module); collectors_discrete_dynamic_sampler_init(profiling_module); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.c b/ext/datadog_profiling_native_extension/ruby_helpers.c index 71a79fb8240..36f4622f996 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.c +++ b/ext/datadog_profiling_native_extension/ruby_helpers.c @@ -16,6 +16,10 @@ static ID to_s_id = Qnil; // Initialized in profiling.c during extension initialization VALUE datadog_profiling_error_class = Qnil; +// Global reference to Datadog::Profiling::ProfilingInternalError exception class +// Initialized in profiling.c during extension initialization +VALUE datadog_profiling_internal_error_class = Qnil; + void ruby_helpers_init(void) { rb_global_variable(&module_object_space); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.h b/ext/datadog_profiling_native_extension/ruby_helpers.h index b59b14a1af3..092e7e04d91 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.h +++ b/ext/datadog_profiling_native_extension/ruby_helpers.h @@ -7,6 +7,10 @@ // This is initialized in profiling.c during extension initialization extern VALUE datadog_profiling_error_class; +// Global reference to Datadog::Profiling::ProfilingInternalError exception class +// This is initialized in profiling.c during extension initialization +extern VALUE datadog_profiling_internal_error_class; + // Initialize internal data needed by some ruby helpers. Should be called during start, before any actual // usage of ruby helpers. void ruby_helpers_init(void); diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index 05bffa5bee4..0794c014d03 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -349,7 +349,7 @@ static VALUE _native_new(VALUE klass) { ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } state->string_storage = string_storage.ok; @@ -383,7 +383,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val ddog_prof_Profile_with_string_storage(sample_types, NULL /* period is optional */, state->string_storage); if (slot_one_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); } state->profile_slot_one = (profile_slot) { .profile = slot_one_profile_result.ok, .start_timestamp = start_timestamp }; @@ -393,7 +393,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val if (slot_two_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { // Note: No need to take any special care of slot one, it'll get cleaned up by stack_recorder_typed_data_free - rb_raise(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); } state->profile_slot_two = (profile_slot) { .profile = slot_two_profile_result.ok, .start_timestamp = start_timestamp }; @@ -591,7 +591,7 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan ddog_prof_MaybeError result = args.advance_gen_result; if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_internal_error_class, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } VALUE start = ruby_time_from(args.slot->start_timestamp); @@ -824,7 +824,7 @@ static locked_profile_slot sampler_lock_active_profile(stack_recorder_state *sta } // We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling. - rb_raise(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile"); + rb_raise(datadog_profiling_error_class, "Failed to grab either mutex in sampler_lock_active_profile"); } static void sampler_unlock_active_profile(locked_profile_slot active_slot) { @@ -889,7 +889,7 @@ static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) { return Qtrue; } else { ENFORCE_SUCCESS_GVL(error); - rb_raise(rb_eRuntimeError, "Failed to raise exception in test_slot_mutex_state; this should never happen"); + rb_raise(datadog_profiling_error_class, "Failed to raise exception in test_slot_mutex_state; this should never happen"); } } @@ -941,7 +941,7 @@ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_ins static void reset_profile_slot(profile_slot *slot, ddog_Timespec start_timestamp) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); } slot->start_timestamp = start_timestamp; slot->stats = (stats_slot) {}; @@ -1056,14 +1056,14 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } ddog_prof_Slice_ValueType sample_types = {.ptr = all_value_types, .len = ALL_VALUE_TYPES_COUNT}; ddog_prof_Profile_NewResult profile = ddog_prof_Profile_with_string_storage(sample_types, NULL, string_storage.ok); if (profile.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); } ddog_prof_ManagedStringId hello = intern_or_raise(string_storage.ok, DDOG_CHARSLICE_C("hello")); @@ -1105,13 +1105,13 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_Profile_SerializeResult serialize_result = ddog_prof_Profile_serialize(&profile.ok, &start_timestamp, &finish_timestamp); if (serialize_result.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); + rb_raise(datadog_profiling_internal_error_class, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); } ddog_prof_MaybeError advance_gen_result = ddog_prof_ManagedStringStorage_advance_gen(string_storage.ok); if (advance_gen_result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); + rb_raise(datadog_profiling_internal_error_class, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); } VALUE encoded_pprof_1 = from_ddog_prof_EncodedProfile(serialize_result.ok); diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c index c3c23b7f1c7..6b9c5bd534e 100644 --- a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c @@ -21,7 +21,7 @@ void unsafe_api_calls_check_init(void) { check_for_unsafe_api_calls_handle = rb_postponed_job_preregister(unused_flags, check_for_unsafe_api_calls, NULL); if (check_for_unsafe_api_calls_handle == POSTPONED_JOB_HANDLE_INVALID) { - rb_raise(rb_eRuntimeError, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); + rb_raise(datadog_profiling_error_class, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); } #endif } diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index f644cc32b7a..28a89f6e7af 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -7,12 +7,17 @@ module Datadog # Datadog Continuous Profiler implementation: https://docs.datadoghq.com/profiler/ module Profiling - # Custom exception class for profiler errors. - # This exception class is used by the profiler's C code to signal errors. - # Telemetry will only include exception messages for instances of this class, - # ensuring that only known-safe messages (created by Datadog code) are reported. + # Custom exception class for profiler errors with constant messages. + # This exception class is used by the profiler's C code to signal errors with constant, + # known-safe messages. Telemetry will include exception messages for instances of this class. class ProfilingError < StandardError; end + # Custom exception class for profiler internal errors with dynamic content. + # This exception class is used for errors that include dynamic information from libdatadog + # or system state. Telemetry will NOT include exception messages from this class to avoid + # fingerprinting issues, but the full details are preserved for local debugging. + class ProfilingInternalError < StandardError; end + def self.supported? unsupported_reason.nil? end diff --git a/sig/datadog/profiling.rbs b/sig/datadog/profiling.rbs index cd76e38817b..90789ef5e7e 100644 --- a/sig/datadog/profiling.rbs +++ b/sig/datadog/profiling.rbs @@ -3,6 +3,9 @@ module Datadog class ProfilingError < StandardError end + class ProfilingInternalError < StandardError + end + def self.supported?: () -> bool def self.unsupported_reason: () -> ::String? def self.start_if_enabled: () -> bool diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index 184d1928a06..97f359959a2 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -120,6 +120,48 @@ def log!(_event) end end end + + context 'with ProfilingInternalError' do + before do + skip unless defined?(Datadog::Profiling::ProfilingInternalError) + end + + it 'excludes the exception message from telemetry' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Datadog::Profiling::ProfilingInternalError', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + # Verify the dynamic content is NOT in the message + expect(event.payload[:logs].map { |log| log[:message] }).not_to include(/Failed to initialize.*0x[0-9a-f]+/) + end + + begin + raise Datadog::Profiling::ProfilingInternalError, 'Failed to initialize string storage: Error at address 0xdeadbeef' + rescue => e + component.report(e, level: :error) + end + end + + context 'with description' do + it 'includes description but excludes exception message' do + expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| + expect(event.payload).to include( + logs: [{message: 'Datadog::Profiling::ProfilingInternalError: libdatadog internal error', level: 'ERROR', count: 1, + stack_trace: a_string_including('REDACTED')}] + ) + # Verify the dynamic content is NOT in the message + expect(event.payload[:logs].map { |log| log[:message] }).not_to include(/memory address/) + end + + begin + raise Datadog::Profiling::ProfilingInternalError, 'Failed to serialize profile: Invalid memory address 0x12345678' + rescue => e + component.report(e, level: :error, description: 'libdatadog internal error') + end + end + end + end end describe '.error' do From b9bb9053d930fcf5f540ee383419e21d508ed91a Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 15:22:32 -0500 Subject: [PATCH 04/10] Fix compilation errors by adding missing ruby_helpers.h includes Add ruby_helpers.h include to 8 C files that use datadog_profiling_error_class and datadog_profiling_internal_error_class but were missing the header declaration. This fixes the compilation error: error: 'datadog_profiling_error_class' undeclared Files fixed: - clock_id_from_pthread.c - collectors_gc_profiling_helper.c - collectors_stack.c - collectors_thread_context.c - encoded_profile.c - libdatadog_helpers.c - private_vm_api_access.c - unsafe_api_calls_check.c --- ext/datadog_profiling_native_extension/clock_id_from_pthread.c | 1 + .../collectors_gc_profiling_helper.c | 1 + ext/datadog_profiling_native_extension/collectors_stack.c | 1 + .../collectors_thread_context.c | 1 + ext/datadog_profiling_native_extension/encoded_profile.c | 1 + ext/datadog_profiling_native_extension/libdatadog_helpers.c | 1 + ext/datadog_profiling_native_extension/private_vm_api_access.c | 1 + ext/datadog_profiling_native_extension/unsafe_api_calls_check.c | 1 + 8 files changed, 8 insertions(+) diff --git a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c index 9ffc1860e1b..1ae9a4acc9e 100644 --- a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c @@ -11,6 +11,7 @@ #include "clock_id.h" #include "helpers.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "time_helpers.h" // Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread diff --git a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c index c9ab53ed64e..d5c301bb97b 100644 --- a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c @@ -2,6 +2,7 @@ #include #include "collectors_gc_profiling_helper.h" +#include "ruby_helpers.h" // This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection. // It's tested through that class' interfaces. diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c index 46f4944e6b8..3816f089073 100644 --- a/ext/datadog_profiling_native_extension/collectors_stack.c +++ b/ext/datadog_profiling_native_extension/collectors_stack.c @@ -17,6 +17,7 @@ #include "datadog_ruby_common.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "stack_recorder.h" #include "collectors_stack.h" diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index c2c38bae845..96239594e07 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -8,6 +8,7 @@ #include "helpers.h" #include "libdatadog_helpers.h" #include "private_vm_api_access.h" +#include "ruby_helpers.h" #include "stack_recorder.h" #include "time_helpers.h" #include "unsafe_api_calls_check.h" diff --git a/ext/datadog_profiling_native_extension/encoded_profile.c b/ext/datadog_profiling_native_extension/encoded_profile.c index 93922e4676a..b50fdf444a2 100644 --- a/ext/datadog_profiling_native_extension/encoded_profile.c +++ b/ext/datadog_profiling_native_extension/encoded_profile.c @@ -1,6 +1,7 @@ #include "encoded_profile.h" #include "datadog_ruby_common.h" #include "libdatadog_helpers.h" +#include "ruby_helpers.h" // This class exists to wrap a ddog_prof_EncodedProfile into a Ruby object // This file implements the native bits of the Datadog::Profiling::EncodedProfile class diff --git a/ext/datadog_profiling_native_extension/libdatadog_helpers.c b/ext/datadog_profiling_native_extension/libdatadog_helpers.c index 3362b329bbd..feddda85bd4 100644 --- a/ext/datadog_profiling_native_extension/libdatadog_helpers.c +++ b/ext/datadog_profiling_native_extension/libdatadog_helpers.c @@ -1,6 +1,7 @@ #include "libdatadog_helpers.h" #include +#include "ruby_helpers.h" const char *ruby_value_type_to_string(enum ruby_value_type type) { return ruby_value_type_to_char_slice(type).ptr; diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index 70822e3abb2..131bc0acd13 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -1,4 +1,5 @@ #include "extconf.h" +#include "ruby_helpers.h" // This file exports functions used to access private Ruby VM APIs and internals. // To do this, it imports a few VM internal (private) headers. diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c index 6b9c5bd534e..b58234c5cfb 100644 --- a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c @@ -3,6 +3,7 @@ #include #include "datadog_ruby_common.h" +#include "ruby_helpers.h" #include "unsafe_api_calls_check.h" #include "extconf.h" From 032adc0cb826bcc9f37f039f38c7c9f564a03714 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 15:33:54 -0500 Subject: [PATCH 05/10] Fix header ordering in private_vm_api_access.c Move ruby_helpers.h include after private VM headers to avoid conflicts. This file requires private VM headers to be included first before any public Ruby headers, but ruby_helpers.h includes datadog_ruby_common.h which includes ruby.h, causing header ordering conflicts. Fixes compilation error: 'expected ')' before '==' token in RHASH_EMPTY_P' --- .../private_vm_api_access.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index 131bc0acd13..9e89bd77c8d 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -1,5 +1,4 @@ #include "extconf.h" -#include "ruby_helpers.h" // This file exports functions used to access private Ruby VM APIs and internals. // To do this, it imports a few VM internal (private) headers. @@ -48,6 +47,9 @@ #define DDTRACE_UNUSED #endif +// Include ruby_helpers.h after all private VM headers are set up +#include "ruby_helpers.h" + #define PRIVATE_VM_API_ACCESS_SKIP_RUBY_INCLUDES #include "private_vm_api_access.h" From 3f01db9ca3c4ed35d8aa82769a22a80051e8a74f Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 15:44:27 -0500 Subject: [PATCH 06/10] Fix: Declare exception classes locally in private_vm_api_access.c Cannot include ruby_helpers.h in this file as it pulls in public Ruby headers (via datadog_ruby_common.h) that conflict with private VM headers. Instead, declare the exception class globals as extern, following the pattern already established in this file for other declarations. This fully resolves the header ordering compilation error. --- .../private_vm_api_access.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index 9e89bd77c8d..7a9ece97da2 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -47,8 +47,10 @@ #define DDTRACE_UNUSED #endif -// Include ruby_helpers.h after all private VM headers are set up -#include "ruby_helpers.h" +// Declare exception class globals from ruby_helpers.h +// (We can't include ruby_helpers.h here as it pulls in public Ruby headers that conflict with private VM headers) +extern VALUE datadog_profiling_error_class; +extern VALUE datadog_profiling_internal_error_class; #define PRIVATE_VM_API_ACCESS_SKIP_RUBY_INCLUDES #include "private_vm_api_access.h" From abe3f0dbd257bb85fd2ca1be1db3bd1588baf86d Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 15:52:19 -0500 Subject: [PATCH 07/10] Update RBS signature for renamed method constant_exception_message Method was renamed from safe_exception_message to constant_exception_message but the RBS signature file was not updated, causing Steep type errors. --- sig/datadog/core/telemetry/logging.rbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sig/datadog/core/telemetry/logging.rbs b/sig/datadog/core/telemetry/logging.rbs index a828b564b27..1d95082d9b7 100644 --- a/sig/datadog/core/telemetry/logging.rbs +++ b/sig/datadog/core/telemetry/logging.rbs @@ -18,7 +18,7 @@ module Datadog private - def safe_exception_message: (Exception exception) -> String? + def constant_exception_message: (Exception exception) -> String? end end end From f04f01d2bd7c3fb8333e8d7bc8af161a30f97ecb Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 16:01:19 -0500 Subject: [PATCH 08/10] Fix: Move error method before private keyword The error method must be public but was accidentally made private when constant_exception_message was added. Moving it before the private keyword restores its public visibility. Fixes test failure: NoMethodError: private method 'error' called --- lib/datadog/core/telemetry/logging.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index e668f270214..eb0dc95cd58 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -66,6 +66,12 @@ def report(exception, level: :error, description: nil) log!(event) end + def error(description) + event = Event::Log.new(message: description, level: :error) + + log!(event) + end + private def constant_exception_message(exception) @@ -75,12 +81,6 @@ def constant_exception_message(exception) exception.message end end - - def error(description) - event = Event::Log.new(message: description, level: :error) - - log!(event) - end end end end From 9e8e039cb1b3eee3b4e8b9dfb7d69c135ffbdfd8 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 7 Nov 2025 16:11:21 -0500 Subject: [PATCH 09/10] Fix serialize! to raise ProfilingInternalError Serialization errors contain dynamic libdatadog content, so they should raise ProfilingInternalError (not ProfilingError or RuntimeError). Updated both the Ruby wrapper code and the test expectation to use ProfilingInternalError consistently. Fixes test failure expecting ProfilingError but getting RuntimeError. --- lib/datadog/profiling/stack_recorder.rb | 2 +- spec/datadog/profiling/stack_recorder_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb index 6c7d7b7b81c..83c7d24b324 100644 --- a/lib/datadog/profiling/stack_recorder.rb +++ b/lib/datadog/profiling/stack_recorder.rb @@ -88,7 +88,7 @@ def serialize! else error_message = result - raise("Failed to serialize profiling data: #{error_message}") + raise ProfilingInternalError, "Failed to serialize profiling data: #{error_message}" end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 32afba63aba..cf75452b8fc 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -878,7 +878,7 @@ def sample_and_clear context "when serialization fails" do before { expect(described_class).to receive(:_native_serialize).and_return([:error, "test error message"]) } - it { expect { serialize! }.to raise_error(Datadog::Profiling::ProfilingError, /test error message/) } + it { expect { serialize! }.to raise_error(Datadog::Profiling::ProfilingInternalError, /test error message/) } end end From c31665ffb522aad36063f61be20bbf643db4ad84 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Fri, 14 Nov 2025 10:59:36 -0500 Subject: [PATCH 10/10] Add compile-time safe macros for profiler exception raising --- .../clock_id_from_pthread.c | 2 +- .../collectors_cpu_and_wall_time_worker.c | 9 ++--- .../collectors_discrete_dynamic_sampler.c | 2 +- .../collectors_gc_profiling_helper.c | 2 +- .../collectors_idle_sampling_helper.c | 2 +- .../collectors_stack.c | 4 +-- .../collectors_thread_context.c | 8 ++--- .../encoded_profile.c | 2 +- .../heap_recorder.c | 36 +++++++++---------- .../http_transport.c | 2 +- .../libdatadog_helpers.c | 4 +-- .../private_vm_api_access.c | 10 +++--- .../profiling.c | 14 ++++---- .../ruby_helpers.c | 8 ++--- .../ruby_helpers.h | 15 ++++++-- .../setup_signal_handler.c | 6 +--- .../stack_recorder.c | 28 +++++++-------- .../unsafe_api_calls_check.c | 2 +- 18 files changed, 80 insertions(+), 76 deletions(-) diff --git a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c index 1ae9a4acc9e..a61a629fed4 100644 --- a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c @@ -19,7 +19,7 @@ void self_test_clock_id(void) { rb_nativethread_id_t expected_pthread_id = pthread_self(); rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current()); - if (expected_pthread_id != actual_pthread_id) rb_raise(datadog_profiling_error_class, "pthread_id_for() self-test failed"); + if (expected_pthread_id != actual_pthread_id) RAISE_PROFILING_TELEMETRY_SAFE("pthread_id_for() self-test failed"); } // Safety: This function is assumed never to raise exceptions by callers diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index cd9e5aaebda..888e27736fb 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ) { - rb_raise(datadog_profiling_error_class, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); } #else gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround(); @@ -472,10 +472,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) { cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state; if (old_state != NULL) { if (is_thread_alive(old_state->owner_thread)) { - rb_raise( - datadog_profiling_error_class, - "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread" - ); + RAISE_PROFILING_TELEMETRY_SAFE("Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"); } else { // The previously active thread seems to have died without cleaning up after itself. // In this case, we can still go ahead and start the profiler BUT we make sure to disable any existing tracepoint @@ -1284,7 +1281,7 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) { static void delayed_error(cpu_and_wall_time_worker_state *state, const char *error) { // If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop. - stop_state(state, rb_exc_new_cstr(datadog_profiling_error_class, error)); + stop_state(state, rb_exc_new_cstr(datadog_profiling_constant_error_class, error)); } static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) { diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c index dd089a8db19..9318974c5ca 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c @@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) { long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); if (now_ns == 0) { - rb_raise(datadog_profiling_error_class, "failed to get clock time"); + RAISE_PROFILING_TELEMETRY_SAFE("failed to get clock time"); } discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns); diff --git a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c index d5c301bb97b..8b4ce65d85c 100644 --- a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c @@ -120,7 +120,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { }; if (label_pos > max_label_count) { - rb_raise(datadog_profiling_error_class, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + RAISE_PROFILING_TELEMETRY_UNSAFE("BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } return label_pos; diff --git a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c index 23586d240fb..0205cfb5717 100644 --- a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c @@ -153,7 +153,7 @@ static void *run_idle_sampling_loop(void *state_ptr) { // Process pending action if (next_action == ACTION_RUN) { if (run_action_function == NULL) { - grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected NULL run_action_function in run_idle_sampling_loop"); + grab_gvl_and_raise(datadog_profiling_constant_error_class, "Unexpected NULL run_action_function in run_idle_sampling_loop"); } run_action_function(); diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c index 3816f089073..9638023c05a 100644 --- a/ext/datadog_profiling_native_extension/collectors_stack.c +++ b/ext/datadog_profiling_native_extension/collectors_stack.c @@ -285,11 +285,11 @@ void sample_thread( // here, but >= 0 makes this easier to understand/debug. bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0; - if (cpu_or_wall_sample && state_label == NULL) rb_raise(datadog_profiling_error_class, "BUG: Unexpected missing state_label"); + if (cpu_or_wall_sample && state_label == NULL) RAISE_PROFILING_TELEMETRY_SAFE("BUG: Unexpected missing state_label"); if (has_cpu_time) { state_label->str = DDOG_CHARSLICE_C("had cpu"); - if (labels.is_gvl_waiting_state) rb_raise(datadog_profiling_error_class, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); + if (labels.is_gvl_waiting_state) RAISE_PROFILING_TELEMETRY_SAFE("BUG: Unexpected combination of cpu-time with is_gvl_waiting"); } int top_of_stack_position = captured_frames - 1; diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index 96239594e07..347abec4391 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -832,7 +832,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { - rb_raise(datadog_profiling_error_class, "BUG: Unexpected call to sample_after_gc without valid GC information available"); + RAISE_PROFILING_TELEMETRY_SAFE("BUG: Unexpected call to sample_after_gc without valid GC information available"); } int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata @@ -999,7 +999,7 @@ static void trigger_sample_for_thread( // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone // changes the code erroneously and remove it entirely? if (label_pos > max_label_count) { - rb_raise(datadog_profiling_error_class, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + RAISE_PROFILING_TELEMETRY_UNSAFE("BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; @@ -1296,7 +1296,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, elapsed_time_ns = 0; } else { // We don't expect non-wall time to go backwards, so let's flag this as a bug - rb_raise(datadog_profiling_error_class, "BUG: Unexpected negative elapsed_time_ns between samples"); + RAISE_PROFILING_TELEMETRY_SAFE("BUG: Unexpected negative elapsed_time_ns between samples"); } } @@ -1962,7 +1962,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { thread_context_collector_state *state; TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); - if (!state->timeline_enabled) rb_raise(datadog_profiling_error_class, "GVL profiling requires timeline to be enabled"); + if (!state->timeline_enabled) RAISE_PROFILING_TELEMETRY_SAFE("GVL profiling requires timeline to be enabled"); intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread); diff --git a/ext/datadog_profiling_native_extension/encoded_profile.c b/ext/datadog_profiling_native_extension/encoded_profile.c index b50fdf444a2..0f100904f20 100644 --- a/ext/datadog_profiling_native_extension/encoded_profile.c +++ b/ext/datadog_profiling_native_extension/encoded_profile.c @@ -42,7 +42,7 @@ VALUE from_ddog_prof_EncodedProfile(ddog_prof_EncodedProfile profile) { static ddog_ByteSlice get_bytes(ddog_prof_EncodedProfile *state) { ddog_prof_Result_ByteSlice raw_bytes = ddog_prof_EncodedProfile_bytes(state); if (raw_bytes.tag == DDOG_PROF_RESULT_BYTE_SLICE_ERR_BYTE_SLICE) { - rb_raise(datadog_profiling_internal_error_class, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err)); } return raw_bytes.ok; } diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index 3a6eaee7ccf..e8804fc1b44 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -300,7 +300,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj } if (heap_recorder->active_recording != NULL) { - rb_raise(datadog_profiling_error_class, "Detected consecutive heap allocation recording starts without end."); + RAISE_PROFILING_TELEMETRY_SAFE("Detected consecutive heap allocation recording starts without end."); } if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate || @@ -323,7 +323,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj VALUE ruby_obj_id = rb_obj_id(new_obj); if (!FIXNUM_P(ruby_obj_id)) { - rb_raise(datadog_profiling_error_class, "Detected a bignum object id. These are not supported by heap profiling."); + RAISE_PROFILING_TELEMETRY_SAFE("Detected a bignum object id. These are not supported by heap profiling."); } heap_recorder->active_recording = object_record_new( @@ -371,7 +371,7 @@ static VALUE end_heap_allocation_recording(VALUE protect_args) { if (active_recording == NULL) { // Recording ended without having been started? - rb_raise(datadog_profiling_error_class, "Ended a heap recording that was not started"); + RAISE_PROFILING_TELEMETRY_SAFE("Ended a heap recording that was not started"); } // From now on, mark the global active recording as invalid so we can short-circuit at any point // and not end up with a still active recording. the local active_recording still holds the @@ -487,14 +487,14 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot != NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(datadog_profiling_error_class, "New heap recorder iteration prepared without the previous one having been finished."); + RAISE_PROFILING_TELEMETRY_SAFE("New heap recorder iteration prepared without the previous one having been finished."); } heap_recorder_update(heap_recorder, /* full_update: */ true); heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records); if (heap_recorder->object_records_snapshot == NULL) { - rb_raise(datadog_profiling_error_class, "Failed to create heap snapshot."); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to create heap snapshot."); } } @@ -505,7 +505,7 @@ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot == NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(datadog_profiling_error_class, "Heap recorder iteration finished without having been prepared."); + RAISE_PROFILING_TELEMETRY_SAFE("Heap recorder iteration finished without having been prepared."); } st_free_table(heap_recorder->object_records_snapshot); @@ -733,7 +733,7 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec // needed to fully build the object_record. active_recording->heap_record = heap_record; if (heap_record->num_tracked_objects == UINT32_MAX) { - rb_raise(datadog_profiling_error_class, "Reached maximum number of tracked objects for heap record"); + RAISE_PROFILING_TELEMETRY_SAFE("Reached maximum number of tracked objects for heap record"); } heap_record->num_tracked_objects++; @@ -741,11 +741,11 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec if (existing_error) { object_record *existing_record = NULL; st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record); - if (existing_record == NULL) rb_raise(datadog_profiling_error_class, "Unexpected NULL when reading existing record"); + if (existing_record == NULL) RAISE_PROFILING_TELEMETRY_SAFE("Unexpected NULL when reading existing record"); VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record); VALUE new_inspect = object_record_inspect(heap_recorder, active_recording); - rb_raise(datadog_profiling_error_class, "Object ids are supposed to be unique. We got 2 allocation recordings with " + RAISE_PROFILING_TELEMETRY_UNSAFE("Object ids are supposed to be unique. We got 2 allocation recordings with " "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect); } } @@ -781,7 +781,7 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec } if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) { - rb_raise(datadog_profiling_error_class, "Attempted to cleanup an untracked heap_record"); + RAISE_PROFILING_TELEMETRY_SAFE("Attempted to cleanup an untracked heap_record"); }; heap_record_free(heap_recorder, heap_record); } @@ -791,14 +791,14 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj // (See PROF-10656 Datadog-internal for details). Just in case, I've sprinkled a bunch of NULL tests in this function for now. // Once we figure out the issue we can get rid of them again. - if (heap_recorder == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder was NULL in on_committed_object_record_cleanup"); - if (heap_recorder->heap_records == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); - if (record == NULL) rb_raise(datadog_profiling_error_class, "record was NULL in on_committed_object_record_cleanup"); + if (heap_recorder == NULL) RAISE_PROFILING_TELEMETRY_SAFE("heap_recorder was NULL in on_committed_object_record_cleanup"); + if (heap_recorder->heap_records == NULL) RAISE_PROFILING_TELEMETRY_SAFE("heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); + if (record == NULL) RAISE_PROFILING_TELEMETRY_SAFE("record was NULL in on_committed_object_record_cleanup"); // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; - if (heap_record == NULL) rb_raise(datadog_profiling_error_class, "heap_record was NULL in on_committed_object_record_cleanup"); + if (heap_record == NULL) RAISE_PROFILING_TELEMETRY_SAFE("heap_record was NULL in on_committed_object_record_cleanup"); heap_record->num_tracked_objects--; @@ -862,7 +862,7 @@ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location l uint16_t frames_len = locations.len; if (frames_len > MAX_FRAMES_LIMIT) { // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism - rb_raise(datadog_profiling_error_class, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); + RAISE_PROFILING_TELEMETRY_UNSAFE("Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); } heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above stack->num_tracked_objects = 0; @@ -933,21 +933,21 @@ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(datadog_profiling_error_class, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(datadog_profiling_error_class, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) { ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id); if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) { - rb_raise(datadog_profiling_error_class, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); } VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message); ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok); diff --git a/ext/datadog_profiling_native_extension/http_transport.c b/ext/datadog_profiling_native_extension/http_transport.c index 4be49eb5fd6..f99e2af4483 100644 --- a/ext/datadog_profiling_native_extension/http_transport.c +++ b/ext/datadog_profiling_native_extension/http_transport.c @@ -112,7 +112,7 @@ static ddog_prof_ProfileExporter_Result create_exporter(VALUE exporter_configura } static void validate_token(ddog_CancellationToken token, const char *file, int line) { - if (token.inner == NULL) rb_raise(datadog_profiling_internal_error_class, "Unexpected: Validation token was empty at %s:%d", file, line); + if (token.inner == NULL) RAISE_PROFILING_TELEMETRY_UNSAFE("Unexpected: Validation token was empty at %s:%d", file, line); } static VALUE handle_exporter_failure(ddog_prof_ProfileExporter_Result exporter_result) { diff --git a/ext/datadog_profiling_native_extension/libdatadog_helpers.c b/ext/datadog_profiling_native_extension/libdatadog_helpers.c index feddda85bd4..64328b48e32 100644 --- a/ext/datadog_profiling_native_extension/libdatadog_helpers.c +++ b/ext/datadog_profiling_native_extension/libdatadog_helpers.c @@ -67,7 +67,7 @@ ddog_prof_ManagedStringId intern_or_raise(ddog_prof_ManagedStringStorage string_ ddog_prof_ManagedStringStorageInternResult intern_result = ddog_prof_ManagedStringStorage_intern(string_storage, string); if (intern_result.tag == DDOG_PROF_MANAGED_STRING_STORAGE_INTERN_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to intern string: %"PRIsVALUE, get_error_details_and_drop(&intern_result.err)); } return intern_result.ok; } @@ -80,6 +80,6 @@ void intern_all_or_raise( ) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_intern_all(string_storage, strings, output_ids, output_ids_size); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to intern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index 7a9ece97da2..c5bbcb520e5 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -49,8 +49,8 @@ // Declare exception class globals from ruby_helpers.h // (We can't include ruby_helpers.h here as it pulls in public Ruby headers that conflict with private VM headers) -extern VALUE datadog_profiling_error_class; -extern VALUE datadog_profiling_internal_error_class; +extern VALUE datadog_profiling_constant_error_class; +extern VALUE datadog_profiling_dynamic_error_class; #define PRIVATE_VM_API_ACCESS_SKIP_RUBY_INCLUDES #include "private_vm_api_access.h" @@ -743,7 +743,7 @@ void self_test_mn_enabled(void) { return; #else if (ddtrace_get_ractor()->threads.sched.enable_mn_threads == true) { - rb_raise(datadog_profiling_error_class, "Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported"); + rb_raise(datadog_profiling_constant_error_class, "Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported"); } #endif } @@ -876,11 +876,11 @@ bool is_raised_flag_set(VALUE thread) { return thread_struct_from_object(thread) expected_current_fiber = current_fiber_for(rb_thread_current()); } - if (expected_current_fiber != actual_current_fiber) rb_raise(datadog_profiling_error_class, "current_fiber_for() self-test failed"); + if (expected_current_fiber != actual_current_fiber) rb_raise(datadog_profiling_constant_error_class, "current_fiber_for() self-test failed"); } #else NORETURN(VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread)); - VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread) { rb_raise(datadog_profiling_error_class, "Not implemented for Ruby < 3.1"); } + VALUE current_fiber_for(DDTRACE_UNUSED VALUE thread) { rb_raise(datadog_profiling_constant_error_class, "Not implemented for Ruby < 3.1"); } void self_test_current_fiber_for(void) { } // Nothing to do #endif diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index 528ff83af91..33f845761cb 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -57,13 +57,13 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { // Initialize the ProfilingError exception class reference // This exception class should be defined in Ruby code (lib/datadog/profiling.rb) - datadog_profiling_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError")); - rb_global_variable(&datadog_profiling_error_class); + datadog_profiling_constant_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError")); + rb_global_variable(&datadog_profiling_constant_error_class); // Initialize the ProfilingInternalError exception class reference // This exception class should be defined in Ruby code (lib/datadog/profiling.rb) - datadog_profiling_internal_error_class = rb_const_get(profiling_module, rb_intern("ProfilingInternalError")); - rb_global_variable(&datadog_profiling_internal_error_class); + datadog_profiling_dynamic_error_class = rb_const_get(profiling_module, rb_intern("ProfilingInternalError")); + rb_global_variable(&datadog_profiling_dynamic_error_class); ruby_helpers_init(); collectors_cpu_and_wall_time_worker_init(profiling_module); @@ -125,7 +125,7 @@ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE except grab_gvl_and_raise(args.exception_class, "%s", args.test_message); } - rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); } static void *trigger_grab_gvl_and_raise(void *trigger_args) { @@ -161,7 +161,7 @@ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message); } - rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); } static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) { @@ -256,7 +256,7 @@ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VA replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler); - if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(datadog_profiling_error_class, "Could not signal background_thread"); + if (holding_the_gvl_signal_handler_result[0] == Qfalse) RAISE_PROFILING_TELEMETRY_SAFE("Could not signal background_thread"); VALUE result = rb_hash_new(); rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.c b/ext/datadog_profiling_native_extension/ruby_helpers.c index 36f4622f996..0b6ddbb44fd 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.c +++ b/ext/datadog_profiling_native_extension/ruby_helpers.c @@ -14,11 +14,11 @@ static ID to_s_id = Qnil; // Global reference to Datadog::Profiling::ProfilingError exception class // Initialized in profiling.c during extension initialization -VALUE datadog_profiling_error_class = Qnil; +VALUE datadog_profiling_constant_error_class = Qnil; // Global reference to Datadog::Profiling::ProfilingInternalError exception class // Initialized in profiling.c during extension initialization -VALUE datadog_profiling_internal_error_class = Qnil; +VALUE datadog_profiling_dynamic_error_class = Qnil; void ruby_helpers_init(void) { rb_global_variable(&module_object_space); @@ -52,7 +52,7 @@ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) { if (is_current_thread_holding_the_gvl()) { rb_raise( - datadog_profiling_error_class, + datadog_profiling_constant_error_class, "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'", args.exception_message ); @@ -84,7 +84,7 @@ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) if (is_current_thread_holding_the_gvl()) { rb_raise( - datadog_profiling_error_class, + datadog_profiling_constant_error_class, "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'", syserr_errno, args.exception_message diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.h b/ext/datadog_profiling_native_extension/ruby_helpers.h index 092e7e04d91..53704f38d77 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.h +++ b/ext/datadog_profiling_native_extension/ruby_helpers.h @@ -5,11 +5,22 @@ // Global reference to Datadog::Profiling::ProfilingError exception class // This is initialized in profiling.c during extension initialization -extern VALUE datadog_profiling_error_class; +// Used for raising exceptions with constant error messages +extern VALUE datadog_profiling_constant_error_class; // Global reference to Datadog::Profiling::ProfilingInternalError exception class // This is initialized in profiling.c during extension initialization -extern VALUE datadog_profiling_internal_error_class; +// Used for raising exceptions with dynamic/formatted error messages +extern VALUE datadog_profiling_dynamic_error_class; + +// Macro for raising ProfilingError with compile-time verified constant string +// The "" string concatenation trick ensures the message is a string literal at compile time +#define RAISE_PROFILING_TELEMETRY_SAFE(msg) \ + rb_raise(datadog_profiling_constant_error_class, "" msg) + +// Macro for raising ProfilingInternalError with dynamic/formatted content +#define RAISE_PROFILING_TELEMETRY_UNSAFE(fmt, ...) \ + rb_raise(datadog_profiling_dynamic_error_class, fmt, ##__VA_ARGS__) // Initialize internal data needed by some ruby helpers. Should be called during start, before any actual // usage of ruby helpers. diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.c b/ext/datadog_profiling_native_extension/setup_signal_handler.c index bab7897710d..8dff20141b1 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.c +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.c @@ -71,11 +71,7 @@ static void install_sigprof_signal_handler_internal( ); } - rb_raise( - datadog_profiling_error_class, - "Could not install profiling signal handler (%s): There's a pre-existing SIGPROF signal handler", - handler_pretty_name - ); + RAISE_PROFILING_TELEMETRY_UNSAFE("Could not install profiling signal handler (%s): There's a pre-existing SIGPROF signal handler", handler_pretty_name); } } diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index 0794c014d03..60971d5af78 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -349,7 +349,7 @@ static VALUE _native_new(VALUE klass) { ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } state->string_storage = string_storage.ok; @@ -383,7 +383,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val ddog_prof_Profile_with_string_storage(sample_types, NULL /* period is optional */, state->string_storage); if (slot_one_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); } state->profile_slot_one = (profile_slot) { .profile = slot_one_profile_result.ok, .start_timestamp = start_timestamp }; @@ -393,7 +393,7 @@ static void initialize_profiles(stack_recorder_state *state, ddog_prof_Slice_Val if (slot_two_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { // Note: No need to take any special care of slot one, it'll get cleaned up by stack_recorder_typed_data_free - rb_raise(datadog_profiling_internal_error_class, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); } state->profile_slot_two = (profile_slot) { .profile = slot_two_profile_result.ok, .start_timestamp = start_timestamp }; @@ -591,7 +591,7 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan ddog_prof_MaybeError result = args.advance_gen_result; if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } VALUE start = ruby_time_from(args.slot->start_timestamp); @@ -770,10 +770,10 @@ static void build_heap_profile_without_gvl(stack_recorder_state *state, profile_ // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the // same locks but acquiring them in different order). if (!iterated) { - grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: iteration cancelled"); + grab_gvl_and_raise(datadog_profiling_constant_error_class, "Failure during heap profile building: iteration cancelled"); } else if (iteration_context.error) { - grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: %s", iteration_context.error_msg); + grab_gvl_and_raise(datadog_profiling_dynamic_error_class, "Failure during heap profile building: %s", iteration_context.error_msg); } } @@ -824,7 +824,7 @@ static locked_profile_slot sampler_lock_active_profile(stack_recorder_state *sta } // We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling. - rb_raise(datadog_profiling_error_class, "Failed to grab either mutex in sampler_lock_active_profile"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to grab either mutex in sampler_lock_active_profile"); } static void sampler_unlock_active_profile(locked_profile_slot active_slot) { @@ -835,7 +835,7 @@ static profile_slot* serializer_flip_active_and_inactive_slots(stack_recorder_st int previously_active_slot = state->active_slot; if (previously_active_slot != 1 && previously_active_slot != 2) { - grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); + grab_gvl_and_raise(datadog_profiling_dynamic_error_class, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); } pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two; @@ -889,7 +889,7 @@ static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) { return Qtrue; } else { ENFORCE_SUCCESS_GVL(error); - rb_raise(datadog_profiling_error_class, "Failed to raise exception in test_slot_mutex_state; this should never happen"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to raise exception in test_slot_mutex_state; this should never happen"); } } @@ -941,7 +941,7 @@ static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_ins static void reset_profile_slot(profile_slot *slot, ddog_Timespec start_timestamp) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); } slot->start_timestamp = start_timestamp; slot->stats = (stats_slot) {}; @@ -1056,14 +1056,14 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_ManagedStringStorageNewResult string_storage = ddog_prof_ManagedStringStorage_new(); if (string_storage.tag == DDOG_PROF_MANAGED_STRING_STORAGE_NEW_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to initialize string storage: %"PRIsVALUE, get_error_details_and_drop(&string_storage.err)); } ddog_prof_Slice_ValueType sample_types = {.ptr = all_value_types, .len = ALL_VALUE_TYPES_COUNT}; ddog_prof_Profile_NewResult profile = ddog_prof_Profile_with_string_storage(sample_types, NULL, string_storage.ok); if (profile.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to initialize profile: %"PRIsVALUE, get_error_details_and_drop(&profile.err)); } ddog_prof_ManagedStringId hello = intern_or_raise(string_storage.ok, DDOG_CHARSLICE_C("hello")); @@ -1105,13 +1105,13 @@ static VALUE _native_test_managed_string_storage_produces_valid_profiles(DDTRACE ddog_prof_Profile_SerializeResult serialize_result = ddog_prof_Profile_serialize(&profile.ok, &start_timestamp, &finish_timestamp); if (serialize_result.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to serialize: %"PRIsVALUE, get_error_details_and_drop(&serialize_result.err)); } ddog_prof_MaybeError advance_gen_result = ddog_prof_ManagedStringStorage_advance_gen(string_storage.ok); if (advance_gen_result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(datadog_profiling_internal_error_class, "Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); + RAISE_PROFILING_TELEMETRY_UNSAFE("Failed to advance string storage gen: %"PRIsVALUE, get_error_details_and_drop(&advance_gen_result.some)); } VALUE encoded_pprof_1 = from_ddog_prof_EncodedProfile(serialize_result.ok); diff --git a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c index b58234c5cfb..65ddd423299 100644 --- a/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c +++ b/ext/datadog_profiling_native_extension/unsafe_api_calls_check.c @@ -22,7 +22,7 @@ void unsafe_api_calls_check_init(void) { check_for_unsafe_api_calls_handle = rb_postponed_job_preregister(unused_flags, check_for_unsafe_api_calls, NULL); if (check_for_unsafe_api_calls_handle == POSTPONED_JOB_HANDLE_INVALID) { - rb_raise(datadog_profiling_error_class, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); + RAISE_PROFILING_TELEMETRY_SAFE("Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); } #endif }