From 16d08637029f6f2cc42f554a67c2f8f03c6f0020 Mon Sep 17 00:00:00 2001 From: Scott Gerring Date: Tue, 21 Oct 2025 07:36:49 +0200 Subject: [PATCH 1/2] chore: OTLP retry post-review cleanup --- opentelemetry-otlp/CHANGELOG.md | 1 + opentelemetry-otlp/src/retry.rs | 44 +++++++++++---------------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/opentelemetry-otlp/CHANGELOG.md b/opentelemetry-otlp/CHANGELOG.md index a4a335e687..e1f6627bcb 100644 --- a/opentelemetry-otlp/CHANGELOG.md +++ b/opentelemetry-otlp/CHANGELOG.md @@ -11,6 +11,7 @@ Released 2025-Sep-25 - Update `opentelemetry-proto` and `opentelemetry-http` dependency version to 0.31.0 - Add HTTP compression support with `gzip-http` and `zstd-http` feature flags - Add retry with exponential backoff and throttling support for HTTP and gRPC exporters + This behaviour is opt in via the `experimental-grpc-retry` and `experimental-http-retry flags` on this crate. ## 0.30.0 diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs index 939e44503b..3475c67b0f 100644 --- a/opentelemetry-otlp/src/retry.rs +++ b/opentelemetry-otlp/src/retry.rs @@ -7,6 +7,11 @@ //! specified retry policy, using exponential backoff and jitter to determine the delay between //! retries. The function uses error classification to determine retry behavior and can honor //! server-provided throttling hints. +#[cfg(any( + feature = "experimental-grpc-retry", + feature = "experimental-http-retry" +))] +use opentelemetry::otel_info; #[cfg(any( feature = "experimental-grpc-retry", @@ -17,24 +22,23 @@ use opentelemetry::otel_warn; feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::future::Future; +use opentelemetry_sdk::runtime::Runtime; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::hash::{DefaultHasher, Hasher}; -use std::time::Duration; +use std::future::Future; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use std::time::SystemTime; - +use std::hash::{DefaultHasher, Hasher}; +use std::time::Duration; #[cfg(any( feature = "experimental-grpc-retry", feature = "experimental-http-retry" ))] -use opentelemetry_sdk::runtime::Runtime; +use std::time::SystemTime; /// Classification of errors for retry purposes. #[derive(Debug, Clone, PartialEq)] @@ -61,26 +65,6 @@ pub struct RetryPolicy { pub jitter_ms: u64, } -/// A runtime stub for when experimental_async_runtime is not enabled. -/// This allows retry policy to be configured but no actual retries occur. -#[cfg(not(any( - feature = "experimental-grpc-retry", - feature = "experimental-http-retry" -)))] -#[derive(Debug, Clone, Default)] -pub struct NoOpRuntime; - -#[cfg(not(any( - feature = "experimental-grpc-retry", - feature = "experimental-http-retry" -)))] -impl NoOpRuntime { - /// Creates a new no-op runtime. - pub fn new() -> Self { - Self - } -} - // Generates a random jitter value up to max_jitter #[cfg(any( feature = "experimental-grpc-retry", @@ -144,13 +128,13 @@ where match error_type { RetryErrorType::NonRetryable => { - otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed with non-retryable error: {:?}", operation_name, err)); + otel_warn!(name: "OtlpRetryNonRetryable", operation = operation_name, error = format!("{:?}", err)); return Err(err); } RetryErrorType::Retryable if attempt < policy.max_retries => { attempt += 1; // Use exponential backoff with jitter - otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to retryable error: {:?}", operation_name, err)); + otel_info!(name: "OtlpRetryRetrying", operation = operation_name, error = format!("{:?}", err)); let jitter = generate_jitter(policy.jitter_ms); let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms); runtime @@ -161,13 +145,13 @@ where RetryErrorType::Throttled(server_delay) if attempt < policy.max_retries => { attempt += 1; // Use server-specified delay (overrides exponential backoff) - otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} after server-specified throttling delay: {:?}", operation_name, server_delay)); + otel_info!(name: "OtlpRetryThrottled", operation = operation_name, error = format!("{:?}", err), delay = format!("{:?}", server_delay)); runtime.delay(server_delay).await; // Don't update exponential backoff delay for next attempt since server provided specific timing } _ => { // Max retries reached - otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed after {} attempts: {:?}", operation_name, attempt, err)); + otel_warn!(name: "OtlpRetryExhausted", operation = operation_name, error = format!("{:?}", err), attempts = attempt); return Err(err); } } From e7c40bdb6a0bc505ccb4471abe8734621d68c0d7 Mon Sep 17 00:00:00 2001 From: Scott Gerring Date: Wed, 22 Oct 2025 08:58:46 +0200 Subject: [PATCH 2/2] chore: PR: OTLP retry PR feedback --- opentelemetry-otlp/CHANGELOG.md | 2 +- opentelemetry-otlp/src/retry.rs | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/opentelemetry-otlp/CHANGELOG.md b/opentelemetry-otlp/CHANGELOG.md index e1f6627bcb..03970aa02d 100644 --- a/opentelemetry-otlp/CHANGELOG.md +++ b/opentelemetry-otlp/CHANGELOG.md @@ -11,7 +11,7 @@ Released 2025-Sep-25 - Update `opentelemetry-proto` and `opentelemetry-http` dependency version to 0.31.0 - Add HTTP compression support with `gzip-http` and `zstd-http` feature flags - Add retry with exponential backoff and throttling support for HTTP and gRPC exporters - This behaviour is opt in via the `experimental-grpc-retry` and `experimental-http-retry flags` on this crate. + This behaviour is opt in via the `experimental-grpc-retry` and `experimental-http-retry` flags on this crate. You can customize the retry policy using the `with_retry_policy` on the exporter builders. ## 0.30.0 diff --git a/opentelemetry-otlp/src/retry.rs b/opentelemetry-otlp/src/retry.rs index 3475c67b0f..d66155e92b 100644 --- a/opentelemetry-otlp/src/retry.rs +++ b/opentelemetry-otlp/src/retry.rs @@ -128,15 +128,23 @@ where match error_type { RetryErrorType::NonRetryable => { - otel_warn!(name: "OtlpRetryNonRetryable", operation = operation_name, error = format!("{:?}", err)); + otel_warn!(name: "Export.Failed.NonRetryable", + operation = operation_name, + message = "OTLP export failed with non-retryable error - telemetry data will be lost"); return Err(err); } RetryErrorType::Retryable if attempt < policy.max_retries => { attempt += 1; // Use exponential backoff with jitter - otel_info!(name: "OtlpRetryRetrying", operation = operation_name, error = format!("{:?}", err)); let jitter = generate_jitter(policy.jitter_ms); let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms); + otel_info!(name: "Export.InProgress.Retrying", + operation = operation_name, + attempt = attempt, + delay_ms = delay_with_jitter, + jitter_ms = jitter, + message = "OTLP export failed with retryable error - retrying" + ); runtime .delay(Duration::from_millis(delay_with_jitter)) .await; @@ -145,13 +153,22 @@ where RetryErrorType::Throttled(server_delay) if attempt < policy.max_retries => { attempt += 1; // Use server-specified delay (overrides exponential backoff) - otel_info!(name: "OtlpRetryThrottled", operation = operation_name, error = format!("{:?}", err), delay = format!("{:?}", server_delay)); + otel_info!(name: "Export.InProgress.Throttled", + operation = operation_name, + attempt = attempt, + delay_ms = server_delay.as_millis(), + message = "OTLP export throttled by OTLP endpoint - delaying and retrying" + ); runtime.delay(server_delay).await; // Don't update exponential backoff delay for next attempt since server provided specific timing } _ => { // Max retries reached - otel_warn!(name: "OtlpRetryExhausted", operation = operation_name, error = format!("{:?}", err), attempts = attempt); + otel_warn!(name: "Export.Failed.Exhausted", + operation = operation_name, + retries = attempt, + message = "OTLP export exhausted retries - telemetry data will be lost" + ); return Err(err); } }