diff --git a/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md b/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md new file mode 100644 index 0000000000000..4762470a059b5 --- /dev/null +++ b/changelog.d/tag_cardinality_limit_metric_labels.enhancement.md @@ -0,0 +1,5 @@ +# Added `include_extended_tags_in_limit_metric` configuration option + +Added `include_extended_tags_in_limit_metric` configuration option to the `tag_cardinality_limit` transform. When enabled, the `tag_value_limit_exceeded_total` metric includes `metric_name` and `tag_key` labels to help identify which specific metrics and tag keys are hitting the configured value limit. This option defaults to `false` to avoid high cardinality issues, and should only be enabled when needed for debugging. + +authors: kaarolch diff --git a/src/internal_events/tag_cardinality_limit.rs b/src/internal_events/tag_cardinality_limit.rs index 011c3d6991833..0fc3d2dff93c4 100644 --- a/src/internal_events/tag_cardinality_limit.rs +++ b/src/internal_events/tag_cardinality_limit.rs @@ -5,6 +5,7 @@ pub struct TagCardinalityLimitRejectingEvent<'a> { pub metric_name: &'a str, pub tag_key: &'a str, pub tag_value: &'a str, + pub include_extended_tags_in_limit_metric: bool, } impl InternalEvent for TagCardinalityLimitRejectingEvent<'_> { @@ -15,7 +16,16 @@ impl InternalEvent for TagCardinalityLimitRejectingEvent<'_> { tag_key = self.tag_key, tag_value = self.tag_value, ); - counter!("tag_value_limit_exceeded_total").increment(1); + if self.include_extended_tags_in_limit_metric { + counter!( + "tag_value_limit_exceeded_total", + "metric_name" => self.metric_name.to_string(), + "tag_key" => self.tag_key.to_string(), + ) + .increment(1); + } else { + counter!("tag_value_limit_exceeded_total").increment(1); + } emit!(ComponentEventsDropped:: { count: 1, @@ -28,6 +38,7 @@ pub struct TagCardinalityLimitRejectingTag<'a> { pub metric_name: &'a str, pub tag_key: &'a str, pub tag_value: &'a str, + pub include_extended_tags_in_limit_metric: bool, } impl InternalEvent for TagCardinalityLimitRejectingTag<'_> { @@ -38,7 +49,16 @@ impl InternalEvent for TagCardinalityLimitRejectingTag<'_> { tag_key = self.tag_key, tag_value = self.tag_value, ); - counter!("tag_value_limit_exceeded_total").increment(1); + if self.include_extended_tags_in_limit_metric { + counter!( + "tag_value_limit_exceeded_total", + "metric_name" => self.metric_name.to_string(), + "tag_key" => self.tag_key.to_string(), + ) + .increment(1); + } else { + counter!("tag_value_limit_exceeded_total").increment(1); + } } } diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index ab8e3e212cad7..37b3e91e89311 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -44,6 +44,14 @@ pub struct TagCardinalityLimitInnerConfig { #[serde(flatten)] pub mode: Mode, + + /// Whether to include extended labels (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric. + /// + /// This can be useful for debugging, but should be used with caution as it can significantly + /// increase metric cardinality if metric names or tag keys are high cardinality. + #[serde(default = "default_include_extended_tags_in_limit_metric")] + #[configurable(metadata(docs::human_name = "Include Extended Tags in Limit Metric"))] + pub include_extended_tags_in_limit_metric: bool, } /// Controls the approach taken for tracking tag cardinality. @@ -115,6 +123,10 @@ const fn default_value_limit() -> usize { 500 } +const fn default_include_extended_tags_in_limit_metric() -> bool { + false +} + pub(crate) const fn default_cache_size() -> usize { 5 * 1024 // 5KB } @@ -126,6 +138,8 @@ impl GenerateConfig for TagCardinalityLimitConfig { mode: Mode::Exact, value_limit: default_value_limit(), limit_exceeded_action: default_limit_exceeded_action(), + include_extended_tags_in_limit_metric: + default_include_extended_tags_in_limit_metric(), }, per_metric_limits: HashMap::default(), }) diff --git a/src/transforms/tag_cardinality_limit/mod.rs b/src/transforms/tag_cardinality_limit/mod.rs index 4e1e881574e7a..51768f120624f 100644 --- a/src/transforms/tag_cardinality_limit/mod.rs +++ b/src/transforms/tag_cardinality_limit/mod.rs @@ -151,10 +151,13 @@ impl TagCardinalityLimit { for (key, value) in tags_map.iter_sets() { if self.tag_limit_exceeded(metric_key.as_ref(), key, value) { + let config = self.get_config_for_metric(metric_key.as_ref()); emit!(TagCardinalityLimitRejectingEvent { metric_name: &metric_name, tag_key: key, tag_value: &value.to_string(), + include_extended_tags_in_limit_metric: config + .include_extended_tags_in_limit_metric, }); return None; } @@ -164,6 +167,8 @@ impl TagCardinalityLimit { } } LimitExceededAction::DropTag => { + let config = self.get_config_for_metric(metric_key.as_ref()); + let include_extended_tags = config.include_extended_tags_in_limit_metric; tags_map.retain(|key, value| { if self.try_accept_tag(metric_key.as_ref(), key, value) { true @@ -172,6 +177,7 @@ impl TagCardinalityLimit { metric_name: &metric_name, tag_key: key, tag_value: &value.to_string(), + include_extended_tags_in_limit_metric: include_extended_tags, }); false } diff --git a/src/transforms/tag_cardinality_limit/tests.rs b/src/transforms/tag_cardinality_limit/tests.rs index 48380472a9e74..71010ab1d9acf 100644 --- a/src/transforms/tag_cardinality_limit/tests.rs +++ b/src/transforms/tag_cardinality_limit/tests.rs @@ -53,6 +53,7 @@ fn make_transform_hashset( value_limit, limit_exceeded_action, mode: Mode::Exact, + include_extended_tags_in_limit_metric: false, }, per_metric_limits: HashMap::new(), } @@ -69,6 +70,7 @@ fn make_transform_bloom( mode: Mode::Probabilistic(BloomFilterConfig { cache_size_per_key: default_cache_size(), }), + include_extended_tags_in_limit_metric: false, }, per_metric_limits: HashMap::new(), } @@ -84,6 +86,7 @@ const fn make_transform_hashset_with_per_metric_limits( value_limit, limit_exceeded_action, mode: Mode::Exact, + include_extended_tags_in_limit_metric: false, }, per_metric_limits, } @@ -101,6 +104,7 @@ const fn make_transform_bloom_with_per_metric_limits( mode: Mode::Probabilistic(BloomFilterConfig { cache_size_per_key: default_cache_size(), }), + include_extended_tags_in_limit_metric: false, }, per_metric_limits, } diff --git a/website/cue/reference/components/sources/internal_metrics.cue b/website/cue/reference/components/sources/internal_metrics.cue index 7498601b6aa9c..6152488f1b0db 100644 --- a/website/cue/reference/components/sources/internal_metrics.cue +++ b/website/cue/reference/components/sources/internal_metrics.cue @@ -772,11 +772,29 @@ components: sources: internal_metrics: { tag_value_limit_exceeded_total: { description: """ The total number of events discarded because the tag has been rejected after - hitting the configured `value_limit`. + hitting the configured `value_limit`. When `include_extended_tags_in_limit_metric` + is enabled in the `tag_cardinality_limit` transform, this metric includes + `metric_name` and `tag_key` labels. By default, this metric has no labels to + keep cardinality low. """ type: "counter" default_namespace: "vector" - tags: _component_tags + tags: _component_tags & { + metric_name: { + description: """ + The name of the metric whose tag value limit was exceeded. + Only present when `include_extended_tags_in_limit_metric` is enabled. + """ + required: false + } + tag_key: { + description: """ + The key of the tag whose value limit was exceeded. + Only present when `include_extended_tags_in_limit_metric` is enabled. + """ + required: false + } + } } timestamp_parse_errors_total: { description: "The total number of errors encountered parsing [RFC 3339](\(urls.rfc_3339)) timestamps." diff --git a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue index a7d92b56adf7a..cae089aaec0d2 100644 --- a/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue +++ b/website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue @@ -12,6 +12,16 @@ generated: components: transforms: tag_cardinality_limit: configuration: { required: false type: uint: default: 5120 } + include_extended_tags_in_limit_metric: { + description: """ + Whether to include extended labels (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric. + + This can be useful for debugging, but should be used with caution as it can significantly + increase metric cardinality if metric names or tag keys are high cardinality. + """ + required: false + type: bool: default: false + } limit_exceeded_action: { description: """ Possible actions to take when an event arrives that would exceed the cardinality limit for one @@ -64,6 +74,16 @@ generated: components: transforms: tag_cardinality_limit: configuration: { required: false type: uint: default: 5120 } + include_extended_tags_in_limit_metric: { + description: """ + Whether to include extended labels (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric. + + This can be useful for debugging, but should be used with caution as it can significantly + increase metric cardinality if metric names or tag keys are high cardinality. + """ + required: false + type: bool: default: false + } limit_exceeded_action: { description: """ Possible actions to take when an event arrives that would exceed the cardinality limit for one diff --git a/website/cue/reference/components/transforms/tag_cardinality_limit.cue b/website/cue/reference/components/transforms/tag_cardinality_limit.cue index f8ca250786f32..7edd7e0c727ee 100644 --- a/website/cue/reference/components/transforms/tag_cardinality_limit.cue +++ b/website/cue/reference/components/transforms/tag_cardinality_limit.cue @@ -187,7 +187,13 @@ components: transforms: tag_cardinality_limit: { } telemetry: metrics: { - tag_value_limit_exceeded_total: components.sources.internal_metrics.output.metrics.tag_value_limit_exceeded_total + tag_value_limit_exceeded_total: components.sources.internal_metrics.output.metrics.tag_value_limit_exceeded_total & { + description: """ + The number of times a tag value limit was exceeded. When `include_extended_tags_in_limit_metric` is enabled, + this metric includes `metric_name` and `tag_key` labels to help identify which specific metrics and tag keys + are hitting the configured value limit. By default, this metric has no labels to keep cardinality low. + """ + } value_limit_reached_total: components.sources.internal_metrics.output.metrics.value_limit_reached_total } }