From 107477770ba40a2c3349151dcf5d66f7d63b7413 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 19 Nov 2025 17:53:13 -0500 Subject: [PATCH 01/16] Send telemetry in forked children --- lib/datadog/core/buffer/random.rb | 6 + lib/datadog/core/buffer/thread_safe.rb | 4 + lib/datadog/core/telemetry/component.rb | 26 ++-- .../core/telemetry/event/app_started.rb | 15 ++ .../synth_app_client_configuration_change.rb | 31 +++- lib/datadog/core/telemetry/worker.rb | 54 ++++++- lib/datadog/core/utils/only_once.rb | 4 +- .../core/utils/only_once_successful.rb | 16 +- sig/datadog/core/buffer/random.rbs | 2 + .../synth_app_client_configuration_change.rbs | 8 +- sig/datadog/core/telemetry/worker.rbs | 2 +- sig/datadog/core/utils/only_once.rbs | 4 +- .../core/utils/only_once_successful.rbs | 8 +- .../core/configuration/deprecations_spec.rb | 2 +- .../core/configuration/stable_config_spec.rb | 2 +- spec/datadog/core/metrics/client_spec.rb | 2 +- spec/datadog/core/telemetry/component_spec.rb | 2 +- .../telemetry/integration/telemetry_spec.rb | 146 ++++++++++++++++++ spec/datadog/profiling/component_spec.rb | 4 +- spec/datadog/profiling/tasks/setup_spec.rb | 2 +- spec/datadog/tracing/component_spec.rb | 2 +- .../tracing/contrib/lograge/patcher_spec.rb | 2 +- .../tracing/contrib/sidekiq/patcher_spec.rb | 2 +- .../tracing/contrib/sidekiq/support/helper.rb | 4 +- spec/support/loaded_gem.rb | 8 +- 25 files changed, 302 insertions(+), 56 deletions(-) diff --git a/lib/datadog/core/buffer/random.rb b/lib/datadog/core/buffer/random.rb index 33838d951a6..dd763169ad0 100644 --- a/lib/datadog/core/buffer/random.rb +++ b/lib/datadog/core/buffer/random.rb @@ -84,6 +84,12 @@ def closed? @closed end + # Discards the contents of the buffer. + def clear + @items = [] + nil + end + protected # Segment items into two segments: underflow and overflow. diff --git a/lib/datadog/core/buffer/thread_safe.rb b/lib/datadog/core/buffer/thread_safe.rb index cdf9df9f0f6..a99d4e4342b 100644 --- a/lib/datadog/core/buffer/thread_safe.rb +++ b/lib/datadog/core/buffer/thread_safe.rb @@ -49,6 +49,10 @@ def close synchronize { super } end + def clear + synchronize { super } + end + def synchronize(&block) @mutex.synchronize(&block) end diff --git a/lib/datadog/core/telemetry/component.rb b/lib/datadog/core/telemetry/component.rb index d29dfa6f654..aa6a96d01e7 100644 --- a/lib/datadog/core/telemetry/component.rb +++ b/lib/datadog/core/telemetry/component.rb @@ -14,8 +14,8 @@ module Datadog module Core module Telemetry - # Telemetry entrypoint, coordinates sending telemetry events at various points in app lifecycle. - # Note: Telemetry does not spawn its worker thread in fork processes, thus no telemetry is sent in forked processes. + # Telemetry entry point, coordinates sending telemetry events at + # various points in application lifecycle. # # @api private class Component @@ -23,6 +23,10 @@ class Component attr_reader :enabled, :logger, :transport, :worker + # Alias for consistency with other components. + # TODO Remove +enabled+ method + alias_method :enabled?, :enabled + include Core::Utils::Forking include Telemetry::Logging @@ -104,13 +108,17 @@ def initialize( # standard:disable Metrics/MethodLength @agent_settings = agent_settings end + attr_reader :settings + attr_reader :agent_settings + attr_reader :logger + def disable! @enabled = false @worker&.enabled = false end def start(initial_event_is_change = false, components:) - return if !@enabled + return unless enabled? initial_event = if initial_event_is_change Event::SynthAppClientConfigurationChange.new( @@ -136,19 +144,19 @@ def shutdown! end def emit_closing! - return if !@enabled || forked? + return unless enabled? @worker.enqueue(Event::AppClosing.new) end def integrations_change! - return if !@enabled || forked? + return unless enabled? @worker.enqueue(Event::AppIntegrationsChange.new) end def log!(event) - return if !@enabled || forked? || !@log_collection_enabled + return unless enabled? && @log_collection_enabled @worker.enqueue(event) end @@ -159,21 +167,21 @@ def log!(event) # # @api private def flush - return if !@enabled || forked? + return unless enabled? @worker.flush end # Report configuration changes caused by Remote Configuration. def client_configuration_change!(changes) - return if !@enabled || forked? + return unless enabled? @worker.enqueue(Event::AppClientConfigurationChange.new(changes, 'remote_config')) end # Report application endpoints def app_endpoints_loaded(endpoints, page_size: ENDPOINT_COLLECTION_MESSAGE_LIMIT) - return if !@enabled || forked? + return unless enabled? endpoints.each_slice(page_size).with_index do |endpoints_slice, i| @worker.enqueue(Event::AppEndpointsLoaded.new(endpoints_slice, is_first: i.zero?)) diff --git a/lib/datadog/core/telemetry/event/app_started.rb b/lib/datadog/core/telemetry/event/app_started.rb index 13cd2bda259..c6683558629 100644 --- a/lib/datadog/core/telemetry/event/app_started.rb +++ b/lib/datadog/core/telemetry/event/app_started.rb @@ -11,6 +11,12 @@ class AppStarted < Base def initialize(components:) # To not hold a reference to the component tree, generate # the event payload here in the constructor. + # + # Important: do not store data that contains (or is derived from) + # the runtime_id oor sequence numbers. + # This event is reused when a process forks, but in the + # child process the runtime_id would be different and sequence + # number would obviously also be different. @configuration = configuration(components.settings, components.agent_settings) @install_signature = install_signature(components.settings) @products = products(components) @@ -30,6 +36,15 @@ def payload } end + # Whether the event is actually the app-started event. + # For the app-started event we follow up by sending + # app-dependencies-loaded, if the event is + # app-client-configuration-change we don't send + # app-dependencies-loaded. + def app_started? + true + end + private def products(components) diff --git a/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb b/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb index 6ddbe9cffeb..de9e9bd7d18 100644 --- a/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +++ b/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb @@ -28,13 +28,36 @@ module Event # and app-closing events. class SynthAppClientConfigurationChange < AppStarted def type - 'app-client-configuration-change' + if reset? + super + else + 'app-client-configuration-change' + end end def payload - { - configuration: @configuration, - } + if reset? + super + else + { + configuration: @configuration, + } + end + end + + def app_started? + reset? + end + + # Revert this event to a "regular" AppStarted event. + # + # Used in after_fork to send the AppStarted event in child processes. + def reset! + @reset = true + end + + def reset? + !!@reset end end end diff --git a/lib/datadog/core/telemetry/worker.rb b/lib/datadog/core/telemetry/worker.rb index 2a0e3a0b915..c7fa5d5d30e 100644 --- a/lib/datadog/core/telemetry/worker.rb +++ b/lib/datadog/core/telemetry/worker.rb @@ -9,7 +9,10 @@ module Datadog module Core module Telemetry - # Accumulates events and sends them to the API at a regular interval, including heartbeat event. + # Accumulates events and sends them to the API at a regular interval, + # including heartbeat event. + # + # @api private class Worker include Core::Workers::Queue include Core::Workers::Polling @@ -40,7 +43,7 @@ def initialize( self.enabled = enabled # Workers::IntervalLoop settings self.loop_base_interval = metrics_aggregation_interval_seconds - self.fork_policy = Core::Workers::Async::Thread::FORK_POLICY_STOP + self.fork_policy = Core::Workers::Async::Thread::FORK_POLICY_RESTART @shutdown_timeout = shutdown_timeout @buffer_size = buffer_size @@ -53,12 +56,13 @@ def initialize( attr_reader :logger attr_reader :initial_event_once attr_reader :initial_event + attr_reader :emitter # Returns true if worker thread is successfully started, # false if worker thread was not started but telemetry is enabled, # nil if telemetry is disabled. def start(initial_event) - return if !enabled? || forked? + return unless enabled? @initial_event = initial_event @@ -79,7 +83,21 @@ def stop(force_stop = false, timeout = @shutdown_timeout) # for not enqueueing event (presently) is that telemetry is disabled # altogether, and in this case other methods return nil. def enqueue(event) - return if !enabled? || forked? + return unless enabled? + + # Start the worker if needed, including in forked children. + # Needs to be done before pushing to buffer since perform + # may invoke after_fork handler which resets the buffer. + # + # Telemetry is special in that it permits events to be submitted + # to the worker with the worker not running, and the worker is + # explicitly started later (to maintain proper initialization order). + # Thus here we can't just call perform unconditionally and must + # check if the worker is supposed to be running, and only call + # perform in that case. + if worker && !worker.alive? + perform + end buffer.push(event) true @@ -133,7 +151,7 @@ def flush private def perform(*events) - return if !enabled? || forked? + return unless enabled? if need_initial_event? started! @@ -189,7 +207,9 @@ def started! # dependencies and send the new ones. # System tests demand only one instance of this event per # dependency. - send_event(Event::AppDependenciesLoaded.new) if @dependency_collection && initial_event.class.eql?(Telemetry::Event::AppStarted) # standard:disable Style/ClassEqualityComparison: + if @dependency_collection && initial_event.app_started? + send_event(Event::AppDependenciesLoaded.new) + end true else @@ -240,6 +260,28 @@ def disable_on_not_found!(response) disable! end + # Stop the worker after fork without sending closing event. + # The closing event will be (or should be) sent by the worker + # in the parent process. + # Also, discard any accumulated events since they will be sent by + # the parent. + def after_fork + # If telemetry is disabled, we still reset the state to avoid + # having wrong state. It is possible that in the future telemetry + # will be re-enabled after errors. + buffer.clear + initial_event_once.reset + # In the child process, we get a new runtime_id. + # As such we need to send AppStarted event. + # In the parent process, the event may have been the + # SynthAppClientConfigurationChange instead of AppStarted, + # and in that case we need to convert it to the "regular" + # AppStarted event. + if @initial_event.is_a?(Event::SynthAppClientConfigurationChange) + @initial_event.reset! # steep:ignore + end + end + # Deduplicate logs by counting the number of repeated occurrences of the same log # entry and replacing them with a single entry with the calculated `count` value. # Non-log events are unchanged. diff --git a/lib/datadog/core/utils/only_once.rb b/lib/datadog/core/utils/only_once.rb index d56b41cf80b..eb423085429 100644 --- a/lib/datadog/core/utils/only_once.rb +++ b/lib/datadog/core/utils/only_once.rb @@ -31,9 +31,7 @@ def ran? @mutex.synchronize { @ran_once } end - private - - def reset_ran_once_state_for_tests + def reset @mutex.synchronize { @ran_once = false } end end diff --git a/lib/datadog/core/utils/only_once_successful.rb b/lib/datadog/core/utils/only_once_successful.rb index 7926192ded0..4d1b020f8d7 100644 --- a/lib/datadog/core/utils/only_once_successful.rb +++ b/lib/datadog/core/utils/only_once_successful.rb @@ -65,6 +65,14 @@ def failed? @mutex.synchronize { @ran_once && @failed } end + def reset + @mutex.synchronize do + @ran_once = false + @failed = false + @retries = 0 + end + end + private def check_limit! @@ -77,14 +85,6 @@ def check_limit! def limited? !@limit.nil? end - - def reset_ran_once_state_for_tests - @mutex.synchronize do - @ran_once = false - @failed = false - @retries = 0 - end - end end end end diff --git a/sig/datadog/core/buffer/random.rbs b/sig/datadog/core/buffer/random.rbs index 4e1c46b3222..cf299a2c708 100644 --- a/sig/datadog/core/buffer/random.rbs +++ b/sig/datadog/core/buffer/random.rbs @@ -35,6 +35,8 @@ module Datadog def push: (Object) -> Object? def replace!: (Object) -> Object? + + def clear: -> void end end end diff --git a/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs b/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs index 24331290384..1df1a93e40b 100644 --- a/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs +++ b/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs @@ -3,9 +3,13 @@ module Datadog module Telemetry module Event class SynthAppClientConfigurationChange < AppStarted - def type: () -> "app-client-configuration-change" + def type: -> "app-client-configuration-change" - def payload: () -> { configuration: untyped } + def payload: () -> { ?products: untyped, configuration: untyped, ?install_signature: untyped } + + def reset?: -> bool + + def reset!: -> void end end end diff --git a/sig/datadog/core/telemetry/worker.rbs b/sig/datadog/core/telemetry/worker.rbs index 4db805e7221..67ad2401272 100644 --- a/sig/datadog/core/telemetry/worker.rbs +++ b/sig/datadog/core/telemetry/worker.rbs @@ -22,7 +22,7 @@ module Datadog @logger: ::Logger attr_reader logger: ::Logger - attr_reader initial_event: Telemetry::Event::Base + attr_reader initial_event: Telemetry::Event::AppStarted attr_reader initial_event_once: Datadog::Core::Utils::OnlyOnceSuccessful def initialize: (?enabled: bool, heartbeat_interval_seconds: Float, metrics_aggregation_interval_seconds: Float, emitter: Emitter, metrics_manager: MetricsManager, ?shutdown_timeout: Float | Integer, ?buffer_size: Integer, dependency_collection: bool, logger: ::Logger) -> void diff --git a/sig/datadog/core/utils/only_once.rbs b/sig/datadog/core/utils/only_once.rbs index 354334c71e1..03247bfdf41 100644 --- a/sig/datadog/core/utils/only_once.rbs +++ b/sig/datadog/core/utils/only_once.rbs @@ -11,9 +11,7 @@ module Datadog def ran?: () -> untyped - private - - def reset_ran_once_state_for_tests: () -> untyped + def reset: -> void end end end diff --git a/sig/datadog/core/utils/only_once_successful.rbs b/sig/datadog/core/utils/only_once_successful.rbs index 2236b5a66b5..bf43c172d05 100644 --- a/sig/datadog/core/utils/only_once_successful.rbs +++ b/sig/datadog/core/utils/only_once_successful.rbs @@ -8,15 +8,15 @@ module Datadog def initialize: (?Integer limit) -> void - def success?: () -> bool + def success?: -> bool - def failed?: () -> bool + def failed?: -> bool private - def check_limit!: () -> void + def check_limit!: -> void - def limited?: () -> bool + def limited?: -> bool end end end diff --git a/spec/datadog/core/configuration/deprecations_spec.rb b/spec/datadog/core/configuration/deprecations_spec.rb index fccebcd9324..7b1c41692d0 100644 --- a/spec/datadog/core/configuration/deprecations_spec.rb +++ b/spec/datadog/core/configuration/deprecations_spec.rb @@ -16,7 +16,7 @@ end before do - described_class.const_get('LOG_DEPRECATIONS_ONLY_ONCE').send(:reset_ran_once_state_for_tests) + described_class.const_get('LOG_DEPRECATIONS_ONLY_ONCE').reset end context 'when deprecated env is set in ENV' do diff --git a/spec/datadog/core/configuration/stable_config_spec.rb b/spec/datadog/core/configuration/stable_config_spec.rb index 3ec522d3f13..43e6ed1b930 100644 --- a/spec/datadog/core/configuration/stable_config_spec.rb +++ b/spec/datadog/core/configuration/stable_config_spec.rb @@ -157,7 +157,7 @@ describe '#log_result' do before do - described_class.const_get(:LOG_ONLY_ONCE).send(:reset_ran_once_state_for_tests) + described_class.const_get(:LOG_ONLY_ONCE).reset end it 'calls logger.debug' do diff --git a/spec/datadog/core/metrics/client_spec.rb b/spec/datadog/core/metrics/client_spec.rb index 89a474bac9d..231d7cac316 100644 --- a/spec/datadog/core/metrics/client_spec.rb +++ b/spec/datadog/core/metrics/client_spec.rb @@ -76,7 +76,7 @@ let(:options) { {statsd: statsd} } before do - described_class.const_get('IGNORED_STATSD_ONLY_ONCE').send(:reset_ran_once_state_for_tests) + described_class.const_get('IGNORED_STATSD_ONLY_ONCE').reset allow(logger).to receive(:warn) end diff --git a/spec/datadog/core/telemetry/component_spec.rb b/spec/datadog/core/telemetry/component_spec.rb index b32c5ab9a7a..e2e65f112f2 100644 --- a/spec/datadog/core/telemetry/component_spec.rb +++ b/spec/datadog/core/telemetry/component_spec.rb @@ -365,7 +365,7 @@ event = instance_double(Datadog::Core::Telemetry::Event::Log) telemetry.log!(event) - expect(worker).not_to have_received(:enqueue) + expect(worker).to have_received(:enqueue).with(event) end end end diff --git a/spec/datadog/core/telemetry/integration/telemetry_spec.rb b/spec/datadog/core/telemetry/integration/telemetry_spec.rb index fdc640605fe..f93cc0bc5a3 100644 --- a/spec/datadog/core/telemetry/integration/telemetry_spec.rb +++ b/spec/datadog/core/telemetry/integration/telemetry_spec.rb @@ -467,4 +467,150 @@ # Network I/O is mocked end end + + context 'when process forks' do + # The mode is irrelevant but we need settings from the context. + include_context 'agent mode' + + context 'when telemetry is disabled' do + before do + settings.telemetry.enabled = false + end + + it 'stays disabled in child process' do + expect(component.enabled?).to be false + expect(component.worker).to be nil + + expect_in_fork do + expect(component.enabled?).to be false + expect(component.worker).to be nil + end + end + end + + context 'when telemetry is enabled' do + before do + settings.telemetry.enabled = true + end + + it 'stays enabled in child process' do + expect(component.enabled?).to be true + expect(component.worker).to be_a(Datadog::Core::Telemetry::Worker) + expect(component.worker.enabled?).to be true + + expect_in_fork do + expect(component.enabled?).to be true + expect(component.worker.enabled?).to be true + end + end + + context 'when worker is running' do + let(:initial_event) do + Datadog::Core::Telemetry::Event::AppStarted.new(components: Datadog.send(:components)) + end + + before do + component.worker.start(initial_event) + end + + it 'restarts worker when event is enqueued' do + expect(component.enabled?).to be true + expect(component.worker).to be_a(Datadog::Core::Telemetry::Worker) + expect(component.worker.enabled?).to be true + expect(component.worker.running?).to be true + + expect_in_fork do + expect(component.enabled?).to be true + expect(component.worker.enabled?).to be true + expect(component.worker.running?).to be false + + # Queueing an event will restart the worker in the forked child. + component.worker.enqueue(Datadog::Core::Telemetry::Event::AppHeartbeat.new) + + expect(component.worker.running?).to be true + end + end + end + + describe 'events generated in forked child' do + # Behavior in the child should be the same regardless of what + # was sent in the parent, because the child is a new application + # (process) from the backend's perspective. + def fork_and_assert + sent_payloads.clear + + expect_in_fork do + component.worker.enqueue(Datadog::Core::Telemetry::Event::AppHeartbeat.new) + + component.flush + end + + expect(sent_payloads.length).to eq 3 + + payload = sent_payloads[0].fetch(:payload) + expect(payload).to include( + 'request_type' => 'app-started', + ) + payload = sent_payloads[1].fetch(:payload) + # The app-dependencies-loaded assertion is also critical here, + # since there is no other test coverage for the + # app-dependencies-loaded event being sent in the forked child. + expect(payload).to include( + 'request_type' => 'app-dependencies-loaded', + ) + payload = sent_payloads[2].fetch(:payload) + expect(payload).to include( + 'request_type' => 'message-batch', + ) + expect(payload.fetch('payload').first).to include( + 'request_type' => 'app-heartbeat', + ) + end + + context 'when initial event is SynthAppClientConfigurationChange' do + let(:initial_event) do + Datadog::Core::Telemetry::Event::SynthAppClientConfigurationChange.new(components: Datadog.send(:components)) + end + + it 'produces correct events in the child' do + component.worker.start(initial_event) + component.flush + + expect(sent_payloads.length).to eq 1 + + payload = sent_payloads[0].fetch(:payload) + expect(payload).to include( + 'request_type' => 'app-client-configuration-change', + ) + + fork_and_assert + end + end + + context 'when initial event is AppStarted' do + let(:initial_event) do + Datadog::Core::Telemetry::Event::AppStarted.new(components: Datadog.send(:components)) + end + + it 'produces correct events in the child' do + component.worker.start(initial_event) + component.flush + + expect(sent_payloads.length).to eq 2 + + payload = sent_payloads[0].fetch(:payload) + expect(payload).to include( + 'request_type' => 'app-started', + ) + payload = sent_payloads[1].fetch(:payload) + expect(payload).to include( + 'request_type' => 'app-dependencies-loaded', + ) + + fork_and_assert + end + end + end + end + end end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 15d97b7d7c5..a162eb72fe3 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -212,7 +212,7 @@ settings.profiling.advanced.gc_enabled = false # Disable this to avoid any additional warnings coming from it stub_const("RUBY_VERSION", testing_version) - described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).send(:reset_ran_once_state_for_tests) + described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).reset end context "on Ruby 2.x" do @@ -354,7 +354,7 @@ before do settings.profiling.allocation_enabled = true - described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).send(:reset_ran_once_state_for_tests) + described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).reset end it "initializes StackRecorder with heap sampling support and warns" do diff --git a/spec/datadog/profiling/tasks/setup_spec.rb b/spec/datadog/profiling/tasks/setup_spec.rb index c8ddc729d66..03221ffded6 100644 --- a/spec/datadog/profiling/tasks/setup_spec.rb +++ b/spec/datadog/profiling/tasks/setup_spec.rb @@ -10,7 +10,7 @@ subject(:run) { task.run } before do - described_class::ACTIVATE_EXTENSIONS_ONLY_ONCE.send(:reset_ran_once_state_for_tests) + described_class::ACTIVATE_EXTENSIONS_ONLY_ONCE.reset end it "actives the forking extension before setting up the at_fork hooks" do diff --git a/spec/datadog/tracing/component_spec.rb b/spec/datadog/tracing/component_spec.rb index fd2cf1bc3fd..65e78a9db40 100644 --- a/spec/datadog/tracing/component_spec.rb +++ b/spec/datadog/tracing/component_spec.rb @@ -535,7 +535,7 @@ let(:responses) { [double('response')] } before do - Datadog::Tracing::Component::WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE.send(:reset_ran_once_state_for_tests) + Datadog::Tracing::Component::WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE.reset end it 'invokes the environment logger with responses' do diff --git a/spec/datadog/tracing/contrib/lograge/patcher_spec.rb b/spec/datadog/tracing/contrib/lograge/patcher_spec.rb index 2758e1ed2e5..2254f7faf39 100644 --- a/spec/datadog/tracing/contrib/lograge/patcher_spec.rb +++ b/spec/datadog/tracing/contrib/lograge/patcher_spec.rb @@ -5,7 +5,7 @@ RSpec.describe Datadog::Tracing::Contrib::Lograge::Patcher do describe '.patch' do - before { described_class.instance_variable_get(:@patch_only_once)&.send(:reset_ran_once_state_for_tests) } + before { described_class.instance_variable_get(:@patch_only_once)&.reset } it 'adds Instrumentation to ancestors of LogSubscribers::Base class' do described_class.patch diff --git a/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb b/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb index a5ab336b48c..dee10f45b69 100644 --- a/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb +++ b/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb @@ -20,7 +20,7 @@ stub_const('Sidekiq::ServerInternalTracer::RedisInfo', Class.new) # NB: This is needed because we want to patch multiple times. - described_class.instance_variable_get(:@patch_only_once)&.send(:reset_ran_once_state_for_tests) + described_class.instance_variable_get(:@patch_only_once)&.reset end # NB: This needs to be after the before block above so that the use :sidekiq diff --git a/spec/datadog/tracing/contrib/sidekiq/support/helper.rb b/spec/datadog/tracing/contrib/sidekiq/support/helper.rb index 4503a53e6a5..2bf606cd222 100644 --- a/spec/datadog/tracing/contrib/sidekiq/support/helper.rb +++ b/spec/datadog/tracing/contrib/sidekiq/support/helper.rb @@ -54,7 +54,7 @@ def expect_in_sidekiq_server(wait_until: nil) # NB: This is needed because we want to patch within a forked process. Datadog::Tracing::Contrib::Sidekiq::Patcher .instance_variable_get(:@patch_only_once) - &.send(:reset_ran_once_state_for_tests) + &.reset require 'sidekiq/cli' @@ -94,7 +94,7 @@ def expect_after_stopping_sidekiq_server # NB: This is needed because we want to patch within a forked process. Datadog::Tracing::Contrib::Sidekiq::Patcher .instance_variable_get(:@patch_only_once) - &.send(:reset_ran_once_state_for_tests) + &.reset require 'sidekiq/cli' diff --git a/spec/support/loaded_gem.rb b/spec/support/loaded_gem.rb index 5f8edde03d2..8d9daa84861 100644 --- a/spec/support/loaded_gem.rb +++ b/spec/support/loaded_gem.rb @@ -38,14 +38,14 @@ def decrement_gem_version(version) def remove_patch!(integration, patch_key = :patch) if (integration.is_a?(Module) || integration.is_a?(Class)) && integration <= Datadog::Tracing::Contrib::Patcher - integration::PATCH_ONLY_ONCE.send(:reset_ran_once_state_for_tests) if defined?(integration::PATCH_ONLY_ONCE) + integration::PATCH_ONLY_ONCE.reset if defined?(integration::PATCH_ONLY_ONCE) if integration.respond_to?(:patch_only_once, true) - integration.send(:patch_only_once).send(:reset_ran_once_state_for_tests) + integration.send(:patch_only_once).reset end elsif Datadog.registry[integration].respond_to?(:patcher) Datadog.registry[integration].patcher.tap do |patcher| - patcher::PATCH_ONLY_ONCE.send(:reset_ran_once_state_for_tests) if defined?(patcher::PATCH_ONLY_ONCE) - patcher.send(:patch_only_once).send(:reset_ran_once_state_for_tests) if patcher.respond_to?( + patcher::PATCH_ONLY_ONCE.reset if defined?(patcher::PATCH_ONLY_ONCE) + patcher.send(:patch_only_once).reset if patcher.respond_to?( :patch_only_once, true ) From fdf0fe9a93e5591baf3fba411ac6e6e84b26b893 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 12:58:35 -0500 Subject: [PATCH 02/16] consolidate attr_readers --- lib/datadog/core/telemetry/component.rb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/datadog/core/telemetry/component.rb b/lib/datadog/core/telemetry/component.rb index aa6a96d01e7..e67c15f4dd6 100644 --- a/lib/datadog/core/telemetry/component.rb +++ b/lib/datadog/core/telemetry/component.rb @@ -21,7 +21,12 @@ module Telemetry class Component ENDPOINT_COLLECTION_MESSAGE_LIMIT = 300 - attr_reader :enabled, :logger, :transport, :worker + attr_reader :enabled + attr_reader :logger + attr_reader :transport + attr_reader :worker + attr_reader :settings + attr_reader :agent_settings # Alias for consistency with other components. # TODO Remove +enabled+ method @@ -108,10 +113,6 @@ def initialize( # standard:disable Metrics/MethodLength @agent_settings = agent_settings end - attr_reader :settings - attr_reader :agent_settings - attr_reader :logger - def disable! @enabled = false @worker&.enabled = false From 1e15731d827a960c4137bac98e317f472b4ae1ec Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 13:02:23 -0500 Subject: [PATCH 03/16] types --- sig/datadog/core/telemetry/component.rbs | 2 ++ sig/datadog/core/telemetry/event/app_started.rbs | 2 ++ .../telemetry/event/synth_app_client_configuration_change.rbs | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sig/datadog/core/telemetry/component.rbs b/sig/datadog/core/telemetry/component.rbs index 0d59476e33f..c2f54a34b38 100644 --- a/sig/datadog/core/telemetry/component.rbs +++ b/sig/datadog/core/telemetry/component.rbs @@ -23,6 +23,8 @@ module Datadog def self.build: (untyped settings, Datadog::Core::Configuration::AgentSettings agent_settings, Datadog::Core::Logger logger) -> Component def initialize: (logger: Core::Logger, settings: untyped, agent_settings: Datadog::Core::Configuration::AgentSettings, enabled: true | false) -> void + + def enabled?: -> bool def disable!: () -> void diff --git a/sig/datadog/core/telemetry/event/app_started.rbs b/sig/datadog/core/telemetry/event/app_started.rbs index d0b7871149e..7d5ec815b26 100644 --- a/sig/datadog/core/telemetry/event/app_started.rbs +++ b/sig/datadog/core/telemetry/event/app_started.rbs @@ -8,6 +8,8 @@ module Datadog def type: () -> "app-started" def payload: () -> { products: untyped, configuration: untyped, install_signature: untyped } + + def app_started?: -> bool private diff --git a/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs b/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs index 1df1a93e40b..188d7bcc9b1 100644 --- a/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs +++ b/sig/datadog/core/telemetry/event/synth_app_client_configuration_change.rbs @@ -3,7 +3,7 @@ module Datadog module Telemetry module Event class SynthAppClientConfigurationChange < AppStarted - def type: -> "app-client-configuration-change" + def type: -> ("app-client-configuration-change" | "app-started") def payload: () -> { ?products: untyped, configuration: untyped, ?install_signature: untyped } From 8ed76348b182932abeb086d217cd3d791e686b92 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 13:33:22 -0500 Subject: [PATCH 04/16] skip fork tests on jruby --- spec/datadog/core/telemetry/integration/telemetry_spec.rb | 2 ++ spec/support/core_helpers.rb | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/spec/datadog/core/telemetry/integration/telemetry_spec.rb b/spec/datadog/core/telemetry/integration/telemetry_spec.rb index f93cc0bc5a3..14ad0d373c0 100644 --- a/spec/datadog/core/telemetry/integration/telemetry_spec.rb +++ b/spec/datadog/core/telemetry/integration/telemetry_spec.rb @@ -469,6 +469,8 @@ end context 'when process forks' do + skip_unless_fork_supported + # The mode is irrelevant but we need settings from the context. include_context 'agent mode' diff --git a/spec/support/core_helpers.rb b/spec/support/core_helpers.rb index 5caca383079..50658a62110 100644 --- a/spec/support/core_helpers.rb +++ b/spec/support/core_helpers.rb @@ -71,6 +71,13 @@ def skip_unless_integration_testing_enabled end end + def skip_unless_fork_supported + unless Process.respond_to?(:fork) + before(:all) do + skip 'Fork is not supported on current platform' + end + end + # Positional and keyword arguments are both accepted to make the method # work on Ruby 2.5/2.6 and 2.7+. In practice only one type of arguments # should be used in any given call. From 8dc1c4a4568e4a5e45ff26c6fa73f4142fe68ddb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 13:34:23 -0500 Subject: [PATCH 05/16] mark as integration tests --- .../datadog/core/telemetry/integration/full_integration_spec.rb | 2 ++ spec/datadog/core/telemetry/integration/telemetry_spec.rb | 2 ++ 2 files changed, 4 insertions(+) diff --git a/spec/datadog/core/telemetry/integration/full_integration_spec.rb b/spec/datadog/core/telemetry/integration/full_integration_spec.rb index 2b1fbb54830..8bd6f55bef6 100644 --- a/spec/datadog/core/telemetry/integration/full_integration_spec.rb +++ b/spec/datadog/core/telemetry/integration/full_integration_spec.rb @@ -5,6 +5,8 @@ require 'datadog/core/telemetry/component' RSpec.describe 'Telemetry full integration tests' do + skip_unless_integration_testing_enabled + context 'when Datadog.configure is used' do let(:worker1) do double(Datadog::Core::Telemetry::Worker) diff --git a/spec/datadog/core/telemetry/integration/telemetry_spec.rb b/spec/datadog/core/telemetry/integration/telemetry_spec.rb index 14ad0d373c0..1793b4760ae 100644 --- a/spec/datadog/core/telemetry/integration/telemetry_spec.rb +++ b/spec/datadog/core/telemetry/integration/telemetry_spec.rb @@ -5,6 +5,8 @@ require 'datadog/core/telemetry/component' RSpec.describe 'Telemetry integration tests' do + skip_unless_integration_testing_enabled + # Although the tests override the environment variables, if any, # with programmatic configuration, that may produce warnings from the # configuration code. Remove environment variables to suppress the warnings. From e8b48cab667b020e923955852fe012ada92c0b6b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 13:38:28 -0500 Subject: [PATCH 06/16] fix syntax --- spec/support/core_helpers.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/support/core_helpers.rb b/spec/support/core_helpers.rb index 50658a62110..1757d031f2d 100644 --- a/spec/support/core_helpers.rb +++ b/spec/support/core_helpers.rb @@ -73,8 +73,9 @@ def skip_unless_integration_testing_enabled def skip_unless_fork_supported unless Process.respond_to?(:fork) - before(:all) do - skip 'Fork is not supported on current platform' + before(:all) do + skip 'Fork is not supported on current platform' + end end end From f2e7239e4a6d7775489bc24faf6b1f6a29adf9b6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 20 Nov 2025 13:48:19 -0500 Subject: [PATCH 07/16] tabs --- sig/datadog/core/buffer/random.rbs | 2 +- sig/datadog/core/telemetry/component.rbs | 2 +- sig/datadog/core/telemetry/event/app_started.rbs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sig/datadog/core/buffer/random.rbs b/sig/datadog/core/buffer/random.rbs index cf299a2c708..edab2bf29bc 100644 --- a/sig/datadog/core/buffer/random.rbs +++ b/sig/datadog/core/buffer/random.rbs @@ -36,7 +36,7 @@ module Datadog def replace!: (Object) -> Object? - def clear: -> void + def clear: -> void end end end diff --git a/sig/datadog/core/telemetry/component.rbs b/sig/datadog/core/telemetry/component.rbs index c2f54a34b38..ef1e2ced28c 100644 --- a/sig/datadog/core/telemetry/component.rbs +++ b/sig/datadog/core/telemetry/component.rbs @@ -24,7 +24,7 @@ module Datadog def initialize: (logger: Core::Logger, settings: untyped, agent_settings: Datadog::Core::Configuration::AgentSettings, enabled: true | false) -> void - def enabled?: -> bool + def enabled?: -> bool def disable!: () -> void diff --git a/sig/datadog/core/telemetry/event/app_started.rbs b/sig/datadog/core/telemetry/event/app_started.rbs index 7d5ec815b26..68bfba41755 100644 --- a/sig/datadog/core/telemetry/event/app_started.rbs +++ b/sig/datadog/core/telemetry/event/app_started.rbs @@ -9,7 +9,7 @@ module Datadog def payload: () -> { products: untyped, configuration: untyped, install_signature: untyped } - def app_started?: -> bool + def app_started?: -> bool private From 1fb050b544a2387d3dd385348c127828f9986958 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 24 Nov 2025 11:00:25 -0500 Subject: [PATCH 08/16] go back to reset_ran_once_state_for_tests --- lib/datadog/core/utils/only_once.rb | 4 +++- lib/datadog/core/utils/only_once_successful.rb | 16 ++++++++-------- sig/datadog/core/utils/only_once.rbs | 4 +++- .../core/configuration/deprecations_spec.rb | 2 +- .../core/configuration/stable_config_spec.rb | 2 +- spec/datadog/core/metrics/client_spec.rb | 2 +- spec/datadog/profiling/component_spec.rb | 4 ++-- spec/datadog/profiling/tasks/setup_spec.rb | 2 +- spec/datadog/tracing/component_spec.rb | 2 +- .../tracing/contrib/lograge/patcher_spec.rb | 2 +- .../tracing/contrib/sidekiq/patcher_spec.rb | 2 +- .../tracing/contrib/sidekiq/support/helper.rb | 4 ++-- spec/support/loaded_gem.rb | 8 ++++---- 13 files changed, 29 insertions(+), 25 deletions(-) diff --git a/lib/datadog/core/utils/only_once.rb b/lib/datadog/core/utils/only_once.rb index eb423085429..d56b41cf80b 100644 --- a/lib/datadog/core/utils/only_once.rb +++ b/lib/datadog/core/utils/only_once.rb @@ -31,7 +31,9 @@ def ran? @mutex.synchronize { @ran_once } end - def reset + private + + def reset_ran_once_state_for_tests @mutex.synchronize { @ran_once = false } end end diff --git a/lib/datadog/core/utils/only_once_successful.rb b/lib/datadog/core/utils/only_once_successful.rb index 4d1b020f8d7..7926192ded0 100644 --- a/lib/datadog/core/utils/only_once_successful.rb +++ b/lib/datadog/core/utils/only_once_successful.rb @@ -65,14 +65,6 @@ def failed? @mutex.synchronize { @ran_once && @failed } end - def reset - @mutex.synchronize do - @ran_once = false - @failed = false - @retries = 0 - end - end - private def check_limit! @@ -85,6 +77,14 @@ def check_limit! def limited? !@limit.nil? end + + def reset_ran_once_state_for_tests + @mutex.synchronize do + @ran_once = false + @failed = false + @retries = 0 + end + end end end end diff --git a/sig/datadog/core/utils/only_once.rbs b/sig/datadog/core/utils/only_once.rbs index 03247bfdf41..354334c71e1 100644 --- a/sig/datadog/core/utils/only_once.rbs +++ b/sig/datadog/core/utils/only_once.rbs @@ -11,7 +11,9 @@ module Datadog def ran?: () -> untyped - def reset: -> void + private + + def reset_ran_once_state_for_tests: () -> untyped end end end diff --git a/spec/datadog/core/configuration/deprecations_spec.rb b/spec/datadog/core/configuration/deprecations_spec.rb index 7b1c41692d0..fccebcd9324 100644 --- a/spec/datadog/core/configuration/deprecations_spec.rb +++ b/spec/datadog/core/configuration/deprecations_spec.rb @@ -16,7 +16,7 @@ end before do - described_class.const_get('LOG_DEPRECATIONS_ONLY_ONCE').reset + described_class.const_get('LOG_DEPRECATIONS_ONLY_ONCE').send(:reset_ran_once_state_for_tests) end context 'when deprecated env is set in ENV' do diff --git a/spec/datadog/core/configuration/stable_config_spec.rb b/spec/datadog/core/configuration/stable_config_spec.rb index 43e6ed1b930..3ec522d3f13 100644 --- a/spec/datadog/core/configuration/stable_config_spec.rb +++ b/spec/datadog/core/configuration/stable_config_spec.rb @@ -157,7 +157,7 @@ describe '#log_result' do before do - described_class.const_get(:LOG_ONLY_ONCE).reset + described_class.const_get(:LOG_ONLY_ONCE).send(:reset_ran_once_state_for_tests) end it 'calls logger.debug' do diff --git a/spec/datadog/core/metrics/client_spec.rb b/spec/datadog/core/metrics/client_spec.rb index 231d7cac316..89a474bac9d 100644 --- a/spec/datadog/core/metrics/client_spec.rb +++ b/spec/datadog/core/metrics/client_spec.rb @@ -76,7 +76,7 @@ let(:options) { {statsd: statsd} } before do - described_class.const_get('IGNORED_STATSD_ONLY_ONCE').reset + described_class.const_get('IGNORED_STATSD_ONLY_ONCE').send(:reset_ran_once_state_for_tests) allow(logger).to receive(:warn) end diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index a162eb72fe3..15d97b7d7c5 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -212,7 +212,7 @@ settings.profiling.advanced.gc_enabled = false # Disable this to avoid any additional warnings coming from it stub_const("RUBY_VERSION", testing_version) - described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).reset + described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).send(:reset_ran_once_state_for_tests) end context "on Ruby 2.x" do @@ -354,7 +354,7 @@ before do settings.profiling.allocation_enabled = true - described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).reset + described_class.const_get(:ALLOCATION_WITH_RACTORS_ONLY_ONCE).send(:reset_ran_once_state_for_tests) end it "initializes StackRecorder with heap sampling support and warns" do diff --git a/spec/datadog/profiling/tasks/setup_spec.rb b/spec/datadog/profiling/tasks/setup_spec.rb index 03221ffded6..c8ddc729d66 100644 --- a/spec/datadog/profiling/tasks/setup_spec.rb +++ b/spec/datadog/profiling/tasks/setup_spec.rb @@ -10,7 +10,7 @@ subject(:run) { task.run } before do - described_class::ACTIVATE_EXTENSIONS_ONLY_ONCE.reset + described_class::ACTIVATE_EXTENSIONS_ONLY_ONCE.send(:reset_ran_once_state_for_tests) end it "actives the forking extension before setting up the at_fork hooks" do diff --git a/spec/datadog/tracing/component_spec.rb b/spec/datadog/tracing/component_spec.rb index 65e78a9db40..fd2cf1bc3fd 100644 --- a/spec/datadog/tracing/component_spec.rb +++ b/spec/datadog/tracing/component_spec.rb @@ -535,7 +535,7 @@ let(:responses) { [double('response')] } before do - Datadog::Tracing::Component::WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE.reset + Datadog::Tracing::Component::WRITER_RECORD_ENVIRONMENT_INFORMATION_ONLY_ONCE.send(:reset_ran_once_state_for_tests) end it 'invokes the environment logger with responses' do diff --git a/spec/datadog/tracing/contrib/lograge/patcher_spec.rb b/spec/datadog/tracing/contrib/lograge/patcher_spec.rb index 2254f7faf39..2758e1ed2e5 100644 --- a/spec/datadog/tracing/contrib/lograge/patcher_spec.rb +++ b/spec/datadog/tracing/contrib/lograge/patcher_spec.rb @@ -5,7 +5,7 @@ RSpec.describe Datadog::Tracing::Contrib::Lograge::Patcher do describe '.patch' do - before { described_class.instance_variable_get(:@patch_only_once)&.reset } + before { described_class.instance_variable_get(:@patch_only_once)&.send(:reset_ran_once_state_for_tests) } it 'adds Instrumentation to ancestors of LogSubscribers::Base class' do described_class.patch diff --git a/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb b/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb index dee10f45b69..a5ab336b48c 100644 --- a/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb +++ b/spec/datadog/tracing/contrib/sidekiq/patcher_spec.rb @@ -20,7 +20,7 @@ stub_const('Sidekiq::ServerInternalTracer::RedisInfo', Class.new) # NB: This is needed because we want to patch multiple times. - described_class.instance_variable_get(:@patch_only_once)&.reset + described_class.instance_variable_get(:@patch_only_once)&.send(:reset_ran_once_state_for_tests) end # NB: This needs to be after the before block above so that the use :sidekiq diff --git a/spec/datadog/tracing/contrib/sidekiq/support/helper.rb b/spec/datadog/tracing/contrib/sidekiq/support/helper.rb index 2bf606cd222..4503a53e6a5 100644 --- a/spec/datadog/tracing/contrib/sidekiq/support/helper.rb +++ b/spec/datadog/tracing/contrib/sidekiq/support/helper.rb @@ -54,7 +54,7 @@ def expect_in_sidekiq_server(wait_until: nil) # NB: This is needed because we want to patch within a forked process. Datadog::Tracing::Contrib::Sidekiq::Patcher .instance_variable_get(:@patch_only_once) - &.reset + &.send(:reset_ran_once_state_for_tests) require 'sidekiq/cli' @@ -94,7 +94,7 @@ def expect_after_stopping_sidekiq_server # NB: This is needed because we want to patch within a forked process. Datadog::Tracing::Contrib::Sidekiq::Patcher .instance_variable_get(:@patch_only_once) - &.reset + &.send(:reset_ran_once_state_for_tests) require 'sidekiq/cli' diff --git a/spec/support/loaded_gem.rb b/spec/support/loaded_gem.rb index 8d9daa84861..5f8edde03d2 100644 --- a/spec/support/loaded_gem.rb +++ b/spec/support/loaded_gem.rb @@ -38,14 +38,14 @@ def decrement_gem_version(version) def remove_patch!(integration, patch_key = :patch) if (integration.is_a?(Module) || integration.is_a?(Class)) && integration <= Datadog::Tracing::Contrib::Patcher - integration::PATCH_ONLY_ONCE.reset if defined?(integration::PATCH_ONLY_ONCE) + integration::PATCH_ONLY_ONCE.send(:reset_ran_once_state_for_tests) if defined?(integration::PATCH_ONLY_ONCE) if integration.respond_to?(:patch_only_once, true) - integration.send(:patch_only_once).reset + integration.send(:patch_only_once).send(:reset_ran_once_state_for_tests) end elsif Datadog.registry[integration].respond_to?(:patcher) Datadog.registry[integration].patcher.tap do |patcher| - patcher::PATCH_ONLY_ONCE.reset if defined?(patcher::PATCH_ONLY_ONCE) - patcher.send(:patch_only_once).reset if patcher.respond_to?( + patcher::PATCH_ONLY_ONCE.send(:reset_ran_once_state_for_tests) if defined?(patcher::PATCH_ONLY_ONCE) + patcher.send(:patch_only_once).send(:reset_ran_once_state_for_tests) if patcher.respond_to?( :patch_only_once, true ) From 9541c23f42a5040063894f75fcea6cbabc76ccb4 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 24 Nov 2025 11:03:00 -0500 Subject: [PATCH 09/16] reset initial event once --- lib/datadog/core/telemetry/worker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/telemetry/worker.rb b/lib/datadog/core/telemetry/worker.rb index c7fa5d5d30e..a106ad8526a 100644 --- a/lib/datadog/core/telemetry/worker.rb +++ b/lib/datadog/core/telemetry/worker.rb @@ -270,7 +270,7 @@ def after_fork # having wrong state. It is possible that in the future telemetry # will be re-enabled after errors. buffer.clear - initial_event_once.reset + @initial_event_once = Utils::OnlyOnceSuccessful.new(APP_STARTED_EVENT_RETRIES) # In the child process, we get a new runtime_id. # As such we need to send AppStarted event. # In the parent process, the event may have been the From bb2da800d1e0073ade2b4489dd46d13446f88f45 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:17:08 -0500 Subject: [PATCH 10/16] Update lib/datadog/core/telemetry/event/app_started.rb Co-authored-by: Marco Costa --- lib/datadog/core/telemetry/event/app_started.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/telemetry/event/app_started.rb b/lib/datadog/core/telemetry/event/app_started.rb index c6683558629..1ffa3602f58 100644 --- a/lib/datadog/core/telemetry/event/app_started.rb +++ b/lib/datadog/core/telemetry/event/app_started.rb @@ -13,7 +13,7 @@ def initialize(components:) # the event payload here in the constructor. # # Important: do not store data that contains (or is derived from) - # the runtime_id oor sequence numbers. + # the runtime_id or sequence numbers. # This event is reused when a process forks, but in the # child process the runtime_id would be different and sequence # number would obviously also be different. From 856b1a00efbdbe8629b3d0264f6cf65bd0440e72 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:17:17 -0500 Subject: [PATCH 11/16] Update lib/datadog/core/telemetry/event/app_started.rb Co-authored-by: Marco Costa --- lib/datadog/core/telemetry/event/app_started.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/telemetry/event/app_started.rb b/lib/datadog/core/telemetry/event/app_started.rb index 1ffa3602f58..dd4c4ed6e83 100644 --- a/lib/datadog/core/telemetry/event/app_started.rb +++ b/lib/datadog/core/telemetry/event/app_started.rb @@ -16,7 +16,7 @@ def initialize(components:) # the runtime_id or sequence numbers. # This event is reused when a process forks, but in the # child process the runtime_id would be different and sequence - # number would obviously also be different. + # number is reset. @configuration = configuration(components.settings, components.agent_settings) @install_signature = install_signature(components.settings) @products = products(components) From c1aafe35c57d600a7a21c6fc7ceb55b2201f2669 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 25 Nov 2025 15:21:11 -0500 Subject: [PATCH 12/16] extract initialize_state --- lib/datadog/core/telemetry/worker.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/telemetry/worker.rb b/lib/datadog/core/telemetry/worker.rb index a106ad8526a..6b0245b04b6 100644 --- a/lib/datadog/core/telemetry/worker.rb +++ b/lib/datadog/core/telemetry/worker.rb @@ -48,6 +48,10 @@ def initialize( @shutdown_timeout = shutdown_timeout @buffer_size = buffer_size + initialize_state + end + + def initialize_state self.buffer = buffer_klass.new(@buffer_size) @initial_event_once = Utils::OnlyOnceSuccessful.new(APP_STARTED_EVENT_RETRIES) @@ -269,8 +273,7 @@ def after_fork # If telemetry is disabled, we still reset the state to avoid # having wrong state. It is possible that in the future telemetry # will be re-enabled after errors. - buffer.clear - @initial_event_once = Utils::OnlyOnceSuccessful.new(APP_STARTED_EVENT_RETRIES) + initialize_state # In the child process, we get a new runtime_id. # As such we need to send AppStarted event. # In the parent process, the event may have been the From 50871092a57eb7197769c679b2f0accffd5fec68 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 25 Nov 2025 15:22:18 -0500 Subject: [PATCH 13/16] remove Buffer#clear --- lib/datadog/core/buffer/random.rb | 6 ------ lib/datadog/core/buffer/thread_safe.rb | 4 ---- sig/datadog/core/buffer/random.rbs | 2 -- 3 files changed, 12 deletions(-) diff --git a/lib/datadog/core/buffer/random.rb b/lib/datadog/core/buffer/random.rb index dd763169ad0..33838d951a6 100644 --- a/lib/datadog/core/buffer/random.rb +++ b/lib/datadog/core/buffer/random.rb @@ -84,12 +84,6 @@ def closed? @closed end - # Discards the contents of the buffer. - def clear - @items = [] - nil - end - protected # Segment items into two segments: underflow and overflow. diff --git a/lib/datadog/core/buffer/thread_safe.rb b/lib/datadog/core/buffer/thread_safe.rb index a99d4e4342b..cdf9df9f0f6 100644 --- a/lib/datadog/core/buffer/thread_safe.rb +++ b/lib/datadog/core/buffer/thread_safe.rb @@ -49,10 +49,6 @@ def close synchronize { super } end - def clear - synchronize { super } - end - def synchronize(&block) @mutex.synchronize(&block) end diff --git a/sig/datadog/core/buffer/random.rbs b/sig/datadog/core/buffer/random.rbs index edab2bf29bc..4e1c46b3222 100644 --- a/sig/datadog/core/buffer/random.rbs +++ b/sig/datadog/core/buffer/random.rbs @@ -35,8 +35,6 @@ module Datadog def push: (Object) -> Object? def replace!: (Object) -> Object? - - def clear: -> void end end end From f44a8f5b51f2977b1f8cbd469643c392b3fc1934 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 25 Nov 2025 15:39:00 -0500 Subject: [PATCH 14/16] type --- sig/datadog/core/telemetry/worker.rbs | 1 + 1 file changed, 1 insertion(+) diff --git a/sig/datadog/core/telemetry/worker.rbs b/sig/datadog/core/telemetry/worker.rbs index 67ad2401272..06b869a3d1b 100644 --- a/sig/datadog/core/telemetry/worker.rbs +++ b/sig/datadog/core/telemetry/worker.rbs @@ -26,6 +26,7 @@ module Datadog attr_reader initial_event_once: Datadog::Core::Utils::OnlyOnceSuccessful def initialize: (?enabled: bool, heartbeat_interval_seconds: Float, metrics_aggregation_interval_seconds: Float, emitter: Emitter, metrics_manager: MetricsManager, ?shutdown_timeout: Float | Integer, ?buffer_size: Integer, dependency_collection: bool, logger: ::Logger) -> void + def initialize_state: -> void def start: (Telemetry::Event::Base initial_event) -> void From fa9154f3ef7e974f980af8d518d2913abe71671e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 25 Nov 2025 15:39:32 -0500 Subject: [PATCH 15/16] make private --- lib/datadog/core/telemetry/worker.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/telemetry/worker.rb b/lib/datadog/core/telemetry/worker.rb index 6b0245b04b6..5f6120234fe 100644 --- a/lib/datadog/core/telemetry/worker.rb +++ b/lib/datadog/core/telemetry/worker.rb @@ -51,7 +51,7 @@ def initialize( initialize_state end - def initialize_state + private def initialize_state self.buffer = buffer_klass.new(@buffer_size) @initial_event_once = Utils::OnlyOnceSuccessful.new(APP_STARTED_EVENT_RETRIES) From 114b87eb0a9dd18f3bab1b3d674059149a6690d8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 25 Nov 2025 20:35:55 -0500 Subject: [PATCH 16/16] add docs --- docs/TelemetryDevelopment.md | 66 ++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 docs/TelemetryDevelopment.md diff --git a/docs/TelemetryDevelopment.md b/docs/TelemetryDevelopment.md new file mode 100644 index 00000000000..cd6fee4ddb5 --- /dev/null +++ b/docs/TelemetryDevelopment.md @@ -0,0 +1,66 @@ +# Telemetry Development + +## Telemetry Presence + +`dd-trace-rb` is written to assume that the telemetry component is always +present. If telemetry is disabled, the component is still created but does +nothing. + +Most components call methods on `telemetry` unconditionally. There are two +exceptons: DI and Data Streams are written to assume that `telemetry` may be nil. +However, this assumption is not necessary and these components may be +changed in the future to assume that `telemetry` is always present. + +## Event Submission Prior To Start + +Telemetry is unique among other components in that it permits events to be +submitted to it prior to its worker starting. This is done so that errors +during `Datadog.configure` processing can be reported via telemetry, because +the errors can be produced prior to telemetry worker starting. The telemetry +component keeps the events and sends them after the worker starts. + +## Initial Event + +`dd-trace-rb` can be initialized multiple times during application boot. +For example, if customers follow our documentation and require +`datadog/auto_instrument`, and call `Datadog.configure`, they would get +`Datadog.configure` invoked two times total (the first time by `auto_instrument`) +and thus telemetry instance would be created twice. This happens in the +applications used with system tests. + +System tests, on the other hand, require that there is only one `app-started` +event emitted, because they think the application is launched once. +To deal with this we have a hack in the telemetry code to send an +`app-client-configuration-change` event instead of the second `app-started` +event. This is implemented via the `SynthAppClientConfigurationChange` class. + +## Fork Handling + +We must send telemetry data from forked children. + +Telemetry started out as a diagnostic tool used during application boot, +but is now used for reporting application liveness (and settings/state) +throughout the application lifetime. Live Debugger / Dynamic Instrumentation, +for example, require ongoing `app-heartbeat` events emitted via telemetry +to provide a working UI to customers. + +It is somewhat common for customers to preload the application in the parent +web server process and process requests from children. This means telemetry +is initialized from the parent process, and it must emit events in the +forked children. + +We use the standard worker `after_fork` handler to recreated the worker +thread in forked children. However, there are two caveats to keep in mind +which are specific to telemetry: + +1. Due to telemetry permitting event submission prior to its start, it is +not sufficient to simply reset the state from the worker's `perform` method, +as is done in other components. We must only reset the state when we are +in the forked child, otherwise we'll trash any events submitted to telemetry +prior to its worker starting. + +2. The child process is a brand new application as far as the backend/UI is +concerned, having a new runtime ID, and therefore the initial event in the +forked child must always be `app-started`. Since we track the initial event +in the telemetry component, this event must be changed to `app-started` in +forked children regardless of what it was in the parent.