From ec6eb4323289b10159a59ab4a5b160c3acd53013 Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Fri, 4 Jul 2025 12:58:05 -0500 Subject: [PATCH 1/2] Add API for automatically recording trace spans duration to a histogram This adds an API for automatically tracking span durations with a `TimeHistogram`. The durations are stored regardless of if the underlying span is sampled. --- foundations-macros/src/span_fn.rs | 112 ++++++++++++-- .../src/telemetry/telemetry_context.rs | 52 +++++++ foundations/src/telemetry/tracing/internal.rs | 37 ++++- foundations/src/telemetry/tracing/mod.rs | 140 +++++++++++++++++- 4 files changed, 325 insertions(+), 16 deletions(-) diff --git a/foundations-macros/src/span_fn.rs b/foundations-macros/src/span_fn.rs index 8e5dd5a..ca9859d 100644 --- a/foundations-macros/src/span_fn.rs +++ b/foundations-macros/src/span_fn.rs @@ -44,6 +44,9 @@ struct Options { #[darling(default = "Options::default_async_local")] async_local: bool, + + #[darling(default = "Options::default_time_histogram")] + with_time_histogram: Option, } impl Options { @@ -54,6 +57,10 @@ impl Options { fn default_async_local() -> bool { false } + + fn default_time_histogram() -> Option { + None + } } struct Args { @@ -113,10 +120,19 @@ fn expand_from_parsed(args: Args, item_fn: ItemFn) -> TokenStream2 { let span_name = args.span_name.as_tokens(); let crate_path = &args.options.crate_path; - quote!( - let __span = #crate_path::telemetry::tracing::span(#span_name); - #block - ) + if let Some(timer) = args.options.with_time_histogram { + quote!( + let __span = #crate_path::telemetry::tracing::span(#span_name) + .into_context() + .with_time_histogram(#timer); + #block + ) + } else { + quote!( + let __span = #crate_path::telemetry::tracing::span(#span_name); + #block + ) + } }), }; @@ -182,13 +198,22 @@ fn wrap_with_span(args: &Args, block: TokenStream2) -> TokenStream2 { let span_name = args.span_name.as_tokens(); let crate_path = &args.options.crate_path; - - quote!( - #crate_path::telemetry::tracing::span(#span_name) - .into_context() - .#apply_fn(#block) - .await - ) + if let Some(timer) = &args.options.with_time_histogram { + quote!( + #crate_path::telemetry::tracing::span(#span_name) + .into_context() + .with_time_histogram(#timer) + .#apply_fn(#block) + .await + ) + } else { + quote!( + #crate_path::telemetry::tracing::span(#span_name) + .into_context() + .#apply_fn(#block) + .await + ) + } } #[cfg(test)] @@ -482,4 +507,69 @@ mod tests { assert_eq!(actual, expected); } + + #[test] + fn span_sync_fn_with_metric_tracker() { + let args = parse_attr! { + #[span_fn("sync_span", with_time_histogram = ::foo::bar::metrics::time_histogram(label = false))] + }; + + let item_fn = parse_quote! { + fn do_sync() -> io::Result { + do_something_else(); + + Ok("foo".into()) + } + }; + + let actual = expand_from_parsed(args, item_fn).to_string(); + + let expected = code_str! { + fn do_sync<>() -> io::Result { + let __span = ::foundations::telemetry::tracing::span("sync_span") + .into_context() + .with_time_histogram(::foo::bar::metrics::time_histogram(label = false)); + { + do_something_else(); + + Ok("foo".into()) + } + } + }; + + assert_eq!(actual, expected); + } + + #[test] + fn span_async_fn_with_metric_tracker() { + let args = parse_attr! { + #[span_fn("async_span", with_time_histogram = ::foo::bar::metrics::time_histogram(label = false))] + }; + + let item_fn = parse_quote! { + async fn do_async() -> io::Result { + do_something_else().await; + + Ok("foo".into()) + } + }; + + let actual = expand_from_parsed(args, item_fn).to_string(); + + let expected = code_str! { + async fn do_async<>() -> io::Result { + ::foundations::telemetry::tracing::span("async_span") + .into_context() + .with_time_histogram(::foo::bar::metrics::time_histogram(label = false)) + .apply(async move {{ + do_something_else().await; + + Ok("foo".into()) + }}) + .await + } + }; + + assert_eq!(actual, expected); + } } diff --git a/foundations/src/telemetry/telemetry_context.rs b/foundations/src/telemetry/telemetry_context.rs index 195f988..35cdfb9 100644 --- a/foundations/src/telemetry/telemetry_context.rs +++ b/foundations/src/telemetry/telemetry_context.rs @@ -18,6 +18,10 @@ feature_use!(cfg(feature = "tracing"), { use super::tracing::internal::Tracer; use super::tracing::testing::{current_test_tracer, TestTracerScope}; }); + + feature_use!(cfg(feature = "metrics"), { + use prometools::histogram::TimeHistogram; + }); }); #[cfg(feature = "testing")] @@ -226,6 +230,54 @@ impl TelemetryContext { TestTelemetryContext::new() } + /// Track the duration of the current `span` in this `TelemetryContext`. The span's duration + /// will be inserted into `histogram`, regardless of whether `span` was sampled or not. + /// + /// # Examples + /// ``` + /// use foundations::telemetry::TelemetryContext; + /// use foundations::telemetry::metrics::{metrics, HistogramBuilder, TimeHistogram}; + /// use foundations::telemetry::tracing::{self, test_trace}; + /// + /// #[metrics] + /// mod test_metrics { + /// #[ctor = HistogramBuilder { buckets: &[1E-4, 2E-4] }] + /// pub fn histogram(label: bool) -> TimeHistogram; + /// } + /// + /// #[tokio::main] + /// async fn main() { + /// // Test context is used for demonstration purposes to show the resulting traces. + /// let ctx = TelemetryContext::test(); + /// let hist = test_metrics::histogram(true); + /// let clone = hist.clone(); + /// + /// { + /// let _scope = ctx.scope(); + /// let _root = tracing::span("root"); + /// let telemetry_ctx = TelemetryContext::current().with_time_histogram(clone); + /// + /// let handle = std::thread::spawn(move || { + /// let _scope = telemetry_ctx.scope(); + /// let _child = tracing::span("child"); + /// }); + /// + /// handle.join(); + /// } + /// + /// let snapshot = hist.snapshot(); + /// assert_eq!(snapshot.count(), 1); + /// } + /// ``` + #[cfg(all(feature = "metrics", feature = "tracing"))] + pub fn with_time_histogram(mut self, histogram: TimeHistogram) -> Self { + if let Some(inner) = self.span.as_mut() { + inner.track_with_time_histogram(histogram); + } + + self + } + /// Wraps a future with the telemetry context. /// /// [`TelemetryScope`] can't be used across `await` points to propagate the telemetry context, diff --git a/foundations/src/telemetry/tracing/internal.rs b/foundations/src/telemetry/tracing/internal.rs index 975bf40..4f3a3c2 100644 --- a/foundations/src/telemetry/tracing/internal.rs +++ b/foundations/src/telemetry/tracing/internal.rs @@ -6,6 +6,7 @@ use cf_rustracing::sampler::BoxSampler; use cf_rustracing::tag::Tag; use cf_rustracing_jaeger::span::{Span, SpanContext, SpanContextState}; use parking_lot::RwLock; +use prometools::histogram::{HistogramTimer, TimeHistogram}; use rand::{self, Rng}; use std::borrow::Cow; use std::error::Error; @@ -54,7 +55,7 @@ impl From for Arc> { } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub(crate) struct SharedSpan { // NOTE: we intentionally use a lock without poisoning here to not // panic the threads if they just share telemetry with failed thread. @@ -62,6 +63,23 @@ pub(crate) struct SharedSpan { // NOTE: store sampling flag separately, so we don't need to acquire lock // every time we need to check the flag. is_sampled: bool, + timer: Option>, +} + +impl SharedSpan { + #[cfg(all(feature = "metrics", feature = "tracing"))] + pub(crate) fn track_with_time_histogram(&mut self, timer: TimeHistogram) { + let _ = self.timer.insert(Arc::new(timer.start_timer())); + } +} + +impl std::fmt::Debug for SharedSpan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SharedSpan") + .field("inner", &self.inner) + .field("is_sampled", &self.is_sampled) + .finish() + } } impl From for SharedSpan { @@ -71,6 +89,7 @@ impl From for SharedSpan { Self { inner: SharedSpanHandle::new(inner), is_sampled, + timer: None, } } } @@ -83,12 +102,22 @@ pub fn write_current_span(write_fn: impl FnOnce(&mut Span)) { } } -pub(crate) fn create_span(name: impl Into>) -> SharedSpan { - match current_span() { +pub(crate) fn create_span( + name: impl Into>, + histogram: Option, +) -> SharedSpan { + let span = match current_span() { Some(parent) => parent.inner.read().child(name, |o| o.start()), None => start_trace(name, Default::default()), + }; + + let timer = histogram.map(|t| Arc::new(t.start_timer())); + + SharedSpan { + is_sampled: span.is_sampled(), + inner: SharedSpanHandle::new(span), + timer, } - .into() } pub(crate) fn current_span() -> Option { diff --git a/foundations/src/telemetry/tracing/mod.rs b/foundations/src/telemetry/tracing/mod.rs index 27aefd8..bae7231 100644 --- a/foundations/src/telemetry/tracing/mod.rs +++ b/foundations/src/telemetry/tracing/mod.rs @@ -19,6 +19,7 @@ use self::internal::{create_span, current_span, span_trace_id, SharedSpan}; use super::scope::Scope; use super::TelemetryContext; use cf_rustracing_jaeger::Span; +use prometools::histogram::TimeHistogram; use std::borrow::Cow; use std::sync::Arc; @@ -71,6 +72,83 @@ pub fn get_active_traces() -> String { /// ); /// ``` /// +/// # Tracking span duration +/// The `with_time_histogram` attribute can be used to time the span. The span duration will be +/// recorded into the passed histogram. +/// +/// ## Note +/// +/// The duration will be recorded regardless of if the span was sampled. +/// +/// ``` +/// use foundations::telemetry::TelemetryContext; +/// use foundations::telemetry::metrics::{metrics, HistogramBuilder, TimeHistogram}; +/// use foundations::telemetry::tracing::{self, test_trace}; +/// +/// #[metrics] +/// mod test_metrics { +/// #[ctor = HistogramBuilder { buckets: &[1E-4, 2E-4] }] +/// pub fn root_histogram() -> TimeHistogram; +/// +/// #[ctor = HistogramBuilder { buckets: &[1E-4, 2E-4] }] +/// pub fn sync_fn_histogram() -> TimeHistogram; +/// +/// #[ctor = HistogramBuilder { buckets: &[1E-4, 2E-4] }] +/// pub fn async_fn_histogram(label: bool) -> TimeHistogram; +/// } +/// +/// // The `track_duration_with` attribute will automatically track the duration of the +/// // `sync_fn` span and drop it into `sync_fn_histogram()`. +/// #[tracing::span_fn("sync_fn", with_time_histogram = test_metrics::sync_fn_histogram())] +/// fn some_sync_production_fn_that_we_test() -> u64 { +/// 57 +/// } +/// +/// // This one's duration will be dropped into `async_fn_histogram()`. +/// #[tracing::span_fn("async_fn", with_time_histogram = test_metrics::async_fn_histogram(true))] +/// async fn some_async_production_fn_that_we_test() -> u64 { +/// 42 +/// } +/// +/// #[tokio::main] +/// async fn main() { +/// let ctx = TelemetryContext::test(); +/// +/// { +/// let _scope = ctx.scope(); +/// let _root = tracing::span("root"); +/// +/// let handle = tokio::spawn(TelemetryContext::current().apply(async { +/// some_async_production_fn_that_we_test().await; +/// })); +/// +/// handle.await; +/// +/// some_sync_production_fn_that_we_test(); +/// } +/// +/// assert_eq!( +/// ctx.traces(Default::default()), +/// vec![ +/// test_trace! { +/// "root" => { +/// "async_fn", +/// "sync_fn" +/// } +/// } +/// ] +/// ); +/// +/// let sync_snapshot = test_metrics::sync_fn_histogram().snapshot(); +/// let async_true_snapshot = test_metrics::async_fn_histogram(true).snapshot(); +/// let async_false_snapshot = test_metrics::async_fn_histogram(false).snapshot(); +/// +/// assert_eq!(sync_snapshot.count(), 1); +/// assert_eq!(async_true_snapshot.count(), 1); +/// assert_eq!(async_false_snapshot.count(), 0); +/// } +/// ``` +/// /// # Using constants for span names /// ``` /// use foundations::telemetry::TelemetryContext; @@ -334,7 +412,67 @@ pub fn state_for_trace_stitching() -> Option { /// ); /// ``` pub fn span(name: impl Into>) -> SpanScope { - SpanScope::new(create_span(name)) + SpanScope::new(create_span(name, None)) +} + +/// Create a new traced span. The span's duration will be automatically recorded into the provided +/// `histogram`. The span ends when the returned [`SpanScope`] is dropped. +/// +/// See [`span()`] for more on how spans behave. Note that the duration will be recorded regardless +/// of if the span is sampled or not, so this can be used to accurately track function/block-level +/// execution time. +/// +/// # Examples +/// ``` +/// use foundations::telemetry::TelemetryContext; +/// use foundations::telemetry::tracing::{self, test_trace}; +/// use foundations::telemetry::metrics::TimeHistogram; +/// +/// // Test context is used for demonstration purposes to show the resulting traces. +/// let ctx = TelemetryContext::test(); +/// +/// let root_hist = TimeHistogram::new(vec![0.0].into_iter()); +/// let root_clone = root_hist.clone(); +/// let hist1 = TimeHistogram::new(vec![0.0].into_iter()); +/// let hist1_clone = hist1.clone(); +/// let hist2 = TimeHistogram::new(vec![0.0].into_iter()); +/// let hist2_clone = hist2.clone(); +/// +/// { +/// let _scope = ctx.scope(); +/// let _root = tracing::timed_span("root", root_clone); +/// +/// { +/// let _span1 = tracing::timed_span("span1", hist1_clone); +/// } +/// +/// let _span2 = tracing::timed_span("span2", hist2_clone); +/// let _span2_1 = tracing::span("span2_1"); +/// } +/// +/// assert_eq!( +/// ctx.traces(Default::default()), +/// vec![test_trace! { +/// "root" => { +/// "span1", +/// "span2" => { +/// "span2_1" +/// } +/// } +/// }] +/// ); +/// +/// let root_snapshot = root_hist.snapshot(); +/// let hist1_snapshot = hist1.snapshot(); +/// let hist2_snapshot = hist2.snapshot(); +/// +/// assert_eq!(root_snapshot.count(), 1); +/// assert_eq!(hist1_snapshot.count(), 1); +/// assert_eq!(hist2_snapshot.count(), 1); +/// ``` +#[cfg(feature = "metrics")] +pub fn timed_span(name: impl Into>, histogram: TimeHistogram) -> SpanScope { + SpanScope::new(create_span(name, Some(histogram))) } /// Starts a new trace. Ends the current one if it is available and links the new one with it. From da868a70413f61389d19df0a28807485feaf6a97 Mon Sep 17 00:00:00 2001 From: Evan Rittenhouse Date: Wed, 16 Jul 2025 20:18:53 -0500 Subject: [PATCH 2/2] Fix clippy lints --- foundations/src/telemetry/memory_profiler.rs | 8 ++------ foundations/src/telemetry/tracing/live/event_output.rs | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/foundations/src/telemetry/memory_profiler.rs b/foundations/src/telemetry/memory_profiler.rs index 0130a2a..7734094 100644 --- a/foundations/src/telemetry/memory_profiler.rs +++ b/foundations/src/telemetry/memory_profiler.rs @@ -195,12 +195,8 @@ fn collect_heap_profile() -> Result { let mut out_file_path_c_str = CString::new(out_file_path)?.into_bytes_with_nul(); let out_file_path_ptr = out_file_path_c_str.as_mut_ptr() as *mut c_char; - control::write(control::PROF_DUMP, out_file_path_ptr).map_err(|e| { - format!( - "failed to dump jemalloc heap profile to {:?}: {}", - out_file_path, e - ) - })?; + control::write(control::PROF_DUMP, out_file_path_ptr) + .map_err(|e| format!("failed to dump jemalloc heap profile to {out_file_path:?}: {e}",))?; let mut profile = Vec::new(); diff --git a/foundations/src/telemetry/tracing/live/event_output.rs b/foundations/src/telemetry/tracing/live/event_output.rs index efdee9b..3c7b5de 100644 --- a/foundations/src/telemetry/tracing/live/event_output.rs +++ b/foundations/src/telemetry/tracing/live/event_output.rs @@ -93,8 +93,7 @@ impl TraceLogBuilder { fn finalize(mut self, end_timestamp: u64) -> String { self.out.push_str(&format!( - "{{\"pid\":1,\"name\":\"Trace dump requested\",\"ph\":\"i\",\"ts\":{},\"s\":\"g\"}}", - end_timestamp, + "{{\"pid\":1,\"name\":\"Trace dump requested\",\"ph\":\"i\",\"ts\":{end_timestamp},\"s\":\"g\"}}", )); self.out.push(']');