diff --git a/.circleci/config.yml b/.circleci/config.yml index e097ce4ea0..7f7f01cb6d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,21 +65,24 @@ commands: jobs: test: - docker: - - image: cimg/go:1.25 - - image: redis:6.2 + # Use the machine executor so testcontainers-go has a local Docker daemon + # to drive โ€” the redis testcontainer needs to be reachable from the test + # process via a host port, which the docker executor cannot provide. + machine: + image: ubuntu-2404:current resource_class: xlarge steps: - checkout - - restore_cache: - keys: - - v1-dockerize-{{ checksum "Makefile" }} - - v1-dockerize- - - run: make dockerize - - save_cache: - key: v1-dockerize-{{ checksum "Makefile" }} - paths: - - dockerize.tar.gz + - run: + name: Install Go and gotestsum + command: | + GO_VERSION=1.25.3 + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz + sudo rm -rf /usr/local/go + sudo tar -C /usr/local -xzf /tmp/go.tar.gz + echo 'export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH' >> "$BASH_ENV" + export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH + go install gotest.tools/gotestsum@latest - restore_cache: keys: - v3-go-mod-{{ checksum "go.sum" }} diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e52fa0e62c..71031f1523 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @honeycombio/pipeline-team +* @honeycombio/agentic-observability diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a5e4aa310b..b323bc40c2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -11,8 +11,6 @@ updates: interval: "monthly" labels: - "type: dependencies" - reviewers: - - "honeycombio/pipeline-team" groups: minor-patch: update-types: diff --git a/CHANGELOG.md b/CHANGELOG.md index f10acfd4d8..610c996381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Refinery Changelog +## 3.2.2 2026-05-26 + +### ๐Ÿ› Fixes + +- fix: validator exits non-zero on YAML parse errors in rules by @VinozzZ in https://github.com/honeycombio/refinery/pull/1820 +- fix: increment send_errors for network errors from request by @VinozzZ in https://github.com/honeycombio/refinery/pull/1823 +- fix: make sure FieldList is sorted before use it as dynsamplerKey by @VinozzZ in https://github.com/honeycombio/refinery/pull/1825 +- fix: overcounting dynsampler event_count and request_count by @VinozzZ in https://github.com/honeycombio/refinery/pull/1826 +- fix: copy fields slice before sorting in newTraceKey by @VinozzZ in https://github.com/honeycombio/refinery/pull/1827 + +### ๐Ÿ›  Maintenance + +- maint: remove proto/otlp fork reference by @VinozzZ in https://github.com/honeycombio/refinery/pull/1822 + +## 3.2.1 2026-05-04 + +This release fixes a bug in OTLP JSON ingestion where `traceId` and `spanId` fields were incorrectly treated as base64-encoded. The OTLP JSON spec explicitly requires these fields to be hex-encoded strings, and clients sending data over OTLP HTTP/JSON would receive corrupted ID values as a result. + +### ๐Ÿ›  Maintenance + +- maint: update honeycombio/husky to v0.43.1 by @VinozzZ in https://github.com/honeycombio/refinery/pull/1816 +- maint(deps): bump the minor-patch group with 8 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1814 + +## 3.2.0 2026-04-13 + +### ๐Ÿ’ก Enhancements + +- feat: add ReceiveKeyIDs config option for key ID-based authorization by @tdarwin in https://github.com/honeycombio/refinery/pull/1803 +- feat: add OTelMetrics.AdditionalAttributes config option by @tdarwin in https://github.com/honeycombio/refinery/pull/1804 +- feat: add granular event metrics by @tdarwin in https://github.com/honeycombio/refinery/pull/1805 + +### ๐Ÿ› Fixes + +- fix: include AdditionalErrorFields in logs for transmission code by @VinozzZ in https://github.com/honeycombio/refinery/pull/1807 + +### ๐Ÿ›  Maintenance + +- fix: update ko build tooling and fix flaky integration test by @tdarwin in https://github.com/honeycombio/refinery/pull/1806 +- maint(deps): bump go.opentelemetry.io/otel/sdk from 1.42.0 to 1.43.0 by @dependabot in https://github.com/honeycombio/refinery/pull/1810 +- maint(deps): bump the minor-patch group across 1 directory with 12 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1812 + +## 3.1.2 2026-03-25 + +This release addresses security vulnerabilities CVE-2026-27139, CVE-2026-27142, and CVE-2026-25679. + +### Features + +- feat: add capacity/limit companion metrics for queues and memory by @mterhar in https://github.com/honeycombio/refinery/pull/1799 + +### Maintenance + +- maint(deps): bump the minor-patch group across 1 directory with 21 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1795 + ## 3.1.1 2026-02-25 ### Features diff --git a/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml b/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml index 34a4771aba..5c199c7226 100644 --- a/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml +++ b/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml @@ -1,6 +1,8 @@ name: go-tests -on: [push] +on: + pull_request: + branches: [ main ] env: TEST_RESULTS: /tmp/test-results @@ -11,16 +13,16 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: [ 1.15.3, 1.19 ] + go-version: ['stable', 'oldstable'] steps: - name: Setup go - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Create test directory run: | @@ -30,7 +32,7 @@ jobs: run: go mod download - name: Cache / restore go modules - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 with: path: | ~/go/pkg/mod @@ -50,7 +52,7 @@ jobs: fi - name: Run golangci-lint - uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 + uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # Install gotestsum with go get for 1.15.3; otherwise default to go install - name: Install gotestsum @@ -71,13 +73,13 @@ jobs: # Save coverage report parts - name: Upload and save artifacts - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f with: name: Test Results-${{matrix.go-version}} path: ${{ env.TEST_RESULTS }} - name: Upload coverage report - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f with: path: coverage.out name: Coverage-report-${{matrix.go-version}} diff --git a/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md b/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md index 6d48174bfb..81b423151c 100644 --- a/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md +++ b/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md @@ -1,3 +1,41 @@ +# 1.9.0 (Mar 30, 2026) + +ENHANCEMENTS: + +Support parsing versions with custom prefixes via opt-in option in https://github.com/hashicorp/go-version/pull/79 + +INTERNAL: + +- Bump the github-actions-backward-compatible group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/179 +- Bump the github-actions-breaking group with 4 updates in https://github.com/hashicorp/go-version/pull/180 +- Bump the github-actions-backward-compatible group with 3 updates in https://github.com/hashicorp/go-version/pull/182 +- Update GitHub Actions to trigger on pull requests and update go version in https://github.com/hashicorp/go-version/pull/185 +- Bump actions/upload-artifact from 6.0.0 to 7.0.0 in the github-actions-breaking group across 1 directory in https://github.com/hashicorp/go-version/pull/183 +- Bump the github-actions-backward-compatible group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/186 + +# 1.8.0 (Nov 28, 2025) + +ENHANCEMENTS: + +- Add benchmark test for version.String() in https://github.com/hashicorp/go-version/pull/159 +- Bytes implementation in https://github.com/hashicorp/go-version/pull/161 + +INTERNAL: + +- Add CODEOWNERS file in .github/CODEOWNERS in https://github.com/hashicorp/go-version/pull/145 +- Linting in https://github.com/hashicorp/go-version/pull/151 +- Correct typos in comments in https://github.com/hashicorp/go-version/pull/134 +- Migrate GitHub Actions updates from TSCCR to Dependabot in https://github.com/hashicorp/go-version/pull/155 +- Bump the github-actions-backward-compatible group with 2 updates in https://github.com/hashicorp/go-version/pull/157 +- Update doc reference in README in https://github.com/hashicorp/go-version/pull/135 +- Bump the github-actions-breaking group with 3 updates in https://github.com/hashicorp/go-version/pull/156 +- [Compliance] - PR Template Changes Required in https://github.com/hashicorp/go-version/pull/158 +- Bump actions/cache from 4.2.3 to 4.2.4 in the github-actions-backward-compatible group in https://github.com/hashicorp/go-version/pull/167 +- Bump actions/checkout from 4.2.2 to 5.0.0 in the github-actions-breaking group in https://github.com/hashicorp/go-version/pull/166 +- Bump the github-actions-breaking group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/171 +- [IND-4226] [COMPLIANCE] Update Copyright Headers in https://github.com/hashicorp/go-version/pull/172 +- drop init() in https://github.com/hashicorp/go-version/pull/175 + # 1.7.0 (May 24, 2024) ENHANCEMENTS: diff --git a/LICENSES/github.com/hashicorp/go-version/README.md b/LICENSES/github.com/hashicorp/go-version/README.md index 83a8249f72..5528960215 100644 --- a/LICENSES/github.com/hashicorp/go-version/README.md +++ b/LICENSES/github.com/hashicorp/go-version/README.md @@ -34,6 +34,32 @@ if v1.LessThan(v2) { } ``` +#### Version Parsing and Comparison with Prefixes + +The library also supports parsing versions with a custom prefix. +Using the `WithPrefix` option, you can specify a prefix to strip +before parsing the version. + +Use `WithPrefix` when your input strings carry a known release prefix such as +`deployment-`, `controller-`, etc. + +After parsing, the prefix is not part of the canonical version value. This +means the regular comparison methods such as `Compare`, `LessThan`, `Equal`, +and `GreaterThan` compare only the stripped version. If you compare versions +from different prefixes with these methods, the prefixes are ignored. If you +need to reject cross-prefix comparisons, inspect the parsed prefixes before +comparing the versions. + +```go +v1, _ := version.NewVersion("deployment-v1.2.3-beta+metadata", version.WithPrefix("deployment-")) +v2, _ := version.NewVersion("deployment-v1.2.4", version.WithPrefix("deployment-")) + +if v1.LessThan(v2) { + fmt.Printf("%s (%s) is less than %s (%s)\n", v1, v1.Original(), v2, v2.Original()) + // Outputs: 1.2.3-beta+metadata (deployment-v1.2.3-beta+metadata) is less than 1.2.4 (deployment-v1.2.4) +} +``` + #### Version Constraints ```go diff --git a/LICENSES/github.com/hashicorp/go-version/version.go b/LICENSES/github.com/hashicorp/go-version/version.go index 17b29732ee..b95503d3cf 100644 --- a/LICENSES/github.com/hashicorp/go-version/version.go +++ b/LICENSES/github.com/hashicorp/go-version/version.go @@ -49,6 +49,23 @@ const ( `?` ) +// Optional options for NewVersion function. +type options struct { + // If set, this prefix will be trimmed from the version string before parsing. + prefix string +} + +// Option is a functional option for NewVersion. +type Option func(*options) + +// WithPrefix is a functional option that sets a prefix to be removed from the +// version string before parsing. +func WithPrefix(prefix string) Option { + return func(o *options) { + o.prefix = prefix + } +} + // Version represents a single version. type Version struct { metadata string @@ -56,12 +73,36 @@ type Version struct { segments []int64 si int original string + prefix string } -// NewVersion parses the given version and returns a new -// Version. -func NewVersion(v string) (*Version, error) { - return newVersion(v, getVersionRegexp()) +// NewVersion parses the given version and returns a new Version. +// +// Optional parsing behavior can be enabled with Option values such as +// WithPrefix, which validates and strips an expected prefix before parsing. +func NewVersion(v string, opts ...Option) (*Version, error) { + options := &options{} + for _, opt := range opts { + if opt != nil { + opt(options) + } + } + + vToParse := v + if options.prefix != "" { + if !strings.HasPrefix(v, options.prefix) { + return nil, fmt.Errorf("version %q does not have prefix %q", v, options.prefix) + } + vToParse = strings.TrimPrefix(v, options.prefix) + } + + ver, err := newVersion(vToParse, getVersionRegexp()) + if err != nil { + return nil, err + } + ver.prefix = options.prefix + ver.original = v + return ver, nil } // NewSemver parses the given version and returns a new @@ -424,6 +465,11 @@ func (v *Version) Original() string { return v.original } +// Prefix returns the explicit prefix used with WithPrefix, if any. +func (v *Version) Prefix() string { + return v.prefix +} + // UnmarshalText implements encoding.TextUnmarshaler interface. func (v *Version) UnmarshalText(b []byte) error { temp, err := NewVersion(string(b)) diff --git a/LICENSES/github.com/hashicorp/go-version/version_test.go b/LICENSES/github.com/hashicorp/go-version/version_test.go index 15a062324f..8da634559b 100644 --- a/LICENSES/github.com/hashicorp/go-version/version_test.go +++ b/LICENSES/github.com/hashicorp/go-version/version_test.go @@ -39,6 +39,8 @@ func TestNewVersion(t *testing.T) { {"1.7rc2", false}, {"v1.7rc2", false}, {"1.0-", false}, + {"controller-v0.40.2", true}, + {"azure-cli-v1.4.2", true}, } for _, tc := range cases { @@ -51,6 +53,33 @@ func TestNewVersion(t *testing.T) { } } +func TestNewVersionWithPrefix(t *testing.T) { + cases := []struct { + version string + prefix string + err bool + }{ + {"", "release-", true}, + {"rel-1.2.3", "release-", true}, + {"release_1.2.3", "release-", true}, + {"release_1.2.0-x.Y.0+metadata", "release_", false}, + {"release-1.2.0-x.Y.0+metadata-width-hyphen", "release-", false}, + {"myrelease-1.2.3-rc1-with-hyphen", "myrelease-", false}, + {"prefix-1.2.3.4", "prefix-", false}, + {"controller-v0.40.2", "controller-", false}, + {"azure-cli-v1.4.2", "azure-cli-", false}, + } + + for _, tc := range cases { + _, err := NewVersion(tc.version, WithPrefix(tc.prefix)) + if tc.err && err == nil { + t.Fatalf("expected error for version: %q", tc.version) + } else if !tc.err && err != nil { + t.Fatalf("error for version %q: %s", tc.version, err) + } + } +} + func TestNewSemver(t *testing.T) { cases := []struct { version string @@ -80,6 +109,8 @@ func TestNewSemver(t *testing.T) { {"1.7rc2", true}, {"v1.7rc2", true}, {"1.0-", true}, + {"controller-v0.40.2", true}, + {"azure-cli-v1.4.2", true}, } for _, tc := range cases { @@ -171,6 +202,107 @@ func TestVersionCompare(t *testing.T) { } } +func TestVersionCompareWithPrefix(t *testing.T) { + cases := []struct { + v1 string + v1Prefix string + v2 string + v2Prefix string + expected int + }{ + {"controller-v0.40.2", "controller-", "controller-v0.40.3", "controller-", -1}, + {"0.40.4", "", "controller-v0.40.2", "controller-", 1}, + {"0.40.4", "", "controller-v0.40.4", "controller-", 0}, + {"azure-cli-v1.4.2", "azure-cli-", "azure-cli-v1.4.2", "azure-cli-", 0}, + {"azure-cli-v1.4.1", "azure-cli-", "azure-cli-v1.4.2", "azure-cli-", -1}, + {"1.4.3", "", "azure-cli-v1.4.2", "azure-cli-", 1}, + {"v1.4.3", "", "azure-cli-v1.4.2", "azure-cli-", 1}, + {"controller-v1.4.1", "controller-", "azure-cli-v1.4.2", "azure-cli-", -1}, + } + + for _, tc := range cases { + var v1 *Version + var err error + if tc.v1Prefix != "" { + v1, err = NewVersion(tc.v1, WithPrefix(tc.v1Prefix)) + } else { + v1, err = NewVersion(tc.v1) + } + if err != nil { + t.Fatalf("err: %s", err) + } + + var v2 *Version + if tc.v2Prefix != "" { + v2, err = NewVersion(tc.v2, WithPrefix(tc.v2Prefix)) + } else { + v2, err = NewVersion(tc.v2) + } + if err != nil { + t.Fatalf("err: %s", err) + } + + actual := v1.Compare(v2) + expected := tc.expected + if actual != expected { + t.Fatalf( + "%s <=> %s\nexpected: %d\nactual: %d", + tc.v1, tc.v2, + expected, actual) + } + } +} + +func TestVersionAccessorsWithPrefix(t *testing.T) { + v, err := NewVersion("controller-v1.2.0-beta.2+build.5", WithPrefix("controller-")) + if err != nil { + t.Fatalf("err: %s", err) + } + + if got := v.Prefix(); got != "controller-" { + t.Fatalf("expected prefix %q, got %q", "controller-", got) + } + + if got := v.Original(); got != "controller-v1.2.0-beta.2+build.5" { + t.Fatalf("expected original %q, got %q", "controller-v1.2.0-beta.2+build.5", got) + } + + if got := v.String(); got != "1.2.0-beta.2+build.5" { + t.Fatalf("expected string %q, got %q", "1.2.0-beta.2+build.5", got) + } + + if got := v.Metadata(); got != "build.5" { + t.Fatalf("expected metadata %q, got %q", "build.5", got) + } + + if got := v.Prerelease(); got != "beta.2" { + t.Fatalf("expected prerelease %q, got %q", "beta.2", got) + } + + expectedSegments := []int{1, 2, 0} + if got := v.Segments(); !reflect.DeepEqual(got, expectedSegments) { + t.Fatalf("expected segments %#v, got %#v", expectedSegments, got) + } + + expectedSegments64 := []int64{1, 2, 0} + if got := v.Segments64(); !reflect.DeepEqual(got, expectedSegments64) { + t.Fatalf("expected segments64 %#v, got %#v", expectedSegments64, got) + } +} + +func TestVersionSegmentsWithPrefix(t *testing.T) { + v, err := NewVersion("azure-cli-v1.4.2", WithPrefix("azure-cli-")) + if err != nil { + t.Fatalf("err: %s", err) + } + + expected := []int{1, 4, 2} + actual := v.Segments() + if !reflect.DeepEqual(actual, expected) { + t.Fatalf("expected: %#v\nactual: %#v", expected, actual) + } +} + func TestVersionCompare_versionAndSemver(t *testing.T) { cases := []struct { versionRaw string diff --git a/LICENSES/github.com/dgryski/go-rendezvous/LICENSE b/LICENSES/go.uber.org/atomic/LICENSE.txt similarity index 92% rename from LICENSES/github.com/dgryski/go-rendezvous/LICENSE rename to LICENSES/go.uber.org/atomic/LICENSE.txt index 22080f736a..8765c9fbc6 100644 --- a/LICENSES/github.com/dgryski/go-rendezvous/LICENSE +++ b/LICENSES/go.uber.org/atomic/LICENSE.txt @@ -1,6 +1,4 @@ -The MIT License (MIT) - -Copyright (c) 2017-2020 Damian Gryski +Copyright (c) 2016 Uber Technologies, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 189fcd6ccf..4226762e68 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ test: test_with_race test_all .PHONY: test_with_race #: run only tests tagged with potential race conditions -test_with_race: test_results wait_for_redis +test_with_race: test_results @echo @echo "+++ testing - race conditions?" @echo @@ -18,7 +18,7 @@ test_with_race: test_results wait_for_redis .PHONY: test_all #: run all tests, but with no race condition detection -test_all: test_results wait_for_redis +test_all: test_results @echo @echo "+++ testing - all the tests" @echo @@ -34,19 +34,10 @@ local_image: ko crane ./build-docker.sh docker tag $$(docker images ko.local/refinery --quiet | head -1) ko.local/refinery:local -.PHONY: wait_for_redis -# wait for Redis to become available for test suite -wait_for_redis: dockerize - @echo - @echo "+++ We need a Redis running to run the tests." - @echo - @echo "Checking with dockerize $(shell ./dockerize --version)" - @./dockerize -wait tcp://localhost:6379 -timeout 30s - # You can override this version from an environment variable. HOST_OS := $(shell uname -s | tr A-Z a-z) # You can override this version from an environment variable. -KO_VERSION ?= 0.11.2 +KO_VERSION ?= 0.18.0 KO_RELEASE_ASSET := ko_${KO_VERSION}_${HOST_OS}_x86_64.tar.gz # ensure the ko command is available ko: ko_${KO_VERSION}.tar.gz @@ -109,7 +100,7 @@ DOCKERIZE_RELEASE_ASSET := dockerize-${HOST_OS}-amd64-${DOCKERIZE_VERSION}.tar.g dockerize.tar.gz: @echo - @echo "+++ Retrieving dockerize tool for Redis readiness check." + @echo "+++ Retrieving dockerize tool for service readiness checks." @echo # make sure that file is available ifeq (, $(shell command -v file)) @@ -189,7 +180,3 @@ unsmoke: @echo "+++ Spinning down the smokers." @echo "" cd smoke-test && docker-compose down --volumes - - - - diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 2e25ca23f1..8b05b3241c 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,46 @@ While [CHANGELOG.md](./CHANGELOG.md) contains detailed documentation and links to all the source code changes in a given release, this document is intended to be aimed at a more comprehensible version of the contents of the release from the point of view of users of Refinery. +## Version 3.2.2 + +This release fixes dynamic sampling correctness and metrics accuracy when multiple collector workers are enabled. + +### Fixes + +* **Throughput sampler correctness**: Fixed throughput targets not being met. If you set `WorkerCount` to `1` as a workaround, you can now remove that override. +* **Dynsampler metrics accuracy**: Fixed `event_count` and `request_count` being reported higher than actual throughput. +* Fixed `send_errors` not being incremented for network-level transmission errors. +* Fixed the config validator not exiting with a non-zero code on YAML parse errors in rules files. + +## Version 3.2.1 + +This release fixes a bug where trace and span IDs were corrupted for clients sending data over OTLP HTTP/JSON. + +## Version 3.2.0 + +This release adds new configuration options for authorization and observability. + +### Configuration Changes + +* **Added**: `AccessKeys.ReceiveKeyIDs` - authorizes incoming traffic by Honeycomb ingest key IDs (obtained from the `/1/auth` endpoint) instead of requiring full API keys. Supports live reload alongside the existing `ReceiveKeys` option. +* **Added**: `OTelMetrics.AdditionalAttributes` - injects custom resource attributes (e.g., cluster ID, environment name) into all OTLP metrics emitted by Refinery. Supplied as comma-separated `key:value` pairs. + +### New Metrics + +* `events_dropped` - Counter tracking the number of events dropped by Refinery. + +### Fixes + +* Fixed `AdditionalErrorFields` not being included in transmission error logs. + +## Version 3.1.2 + +This patch release primarily addresses security vulnerabilities in dependencies. + +### Maintenance + +- Updated dependencies to address security vulnerabilities CVE-2026-27139, CVE-2026-27142, and CVE-2026-25679. + ## Version 3.1.1 This patch release includes bug fixes and a new feature for configuring additional HTTP headers. diff --git a/agent/agent.go b/agent/agent.go index f408d8bfd6..736598dd7d 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -228,6 +228,22 @@ func (agent *Agent) healthCheck() { agent.usageTracker.Add(signal_traces, traceUsage) agent.usageTracker.Add(signal_logs, logUsage) + + var eventsReceived float64 + if v, ok := agent.metrics.Get("incoming_router_span"); ok { + eventsReceived += v + } + if v, ok := agent.metrics.Get("incoming_router_nonspan_event"); ok { + eventsReceived += v + } + if v, ok := agent.metrics.Get("incoming_router_event"); ok { + eventsReceived += v + } + agent.usageTracker.Add(signal_events_received, eventsReceived) + + if eventsDropped, ok := agent.metrics.Get("events_dropped"); ok { + agent.usageTracker.Add(signal_events_dropped, eventsDropped) + } } } } diff --git a/agent/otlp_metrics.go b/agent/otlp_metrics.go index 7e9e8140e4..b5e275925c 100644 --- a/agent/otlp_metrics.go +++ b/agent/otlp_metrics.go @@ -9,9 +9,22 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" ) +type metricMapping struct { + metricName string + signal string +} + +var signalToMetric = map[usageSignal]metricMapping{ + signal_traces: {metricName: "bytes_received", signal: "traces"}, + signal_logs: {metricName: "bytes_received", signal: "logs"}, + signal_events_received: {metricName: "events_received", signal: ""}, + signal_events_dropped: {metricName: "events_dropped", signal: ""}, +} + type otlpMetrics struct { metrics pmetric.Metrics - ms pmetric.Sum + sums map[string]pmetric.Sum + sm pmetric.ScopeMetrics } func newOTLPMetrics(serviceName, version, hostname string) *otlpMetrics { @@ -22,25 +35,42 @@ func newOTLPMetrics(serviceName, version, hostname string) *otlpMetrics { resourceAttrs.PutStr("service.version", version) resourceAttrs.PutStr("host.name", hostname) sm := rm.ScopeMetrics().AppendEmpty() - ms := sm.Metrics().AppendEmpty() - ms.SetName("bytes_received") - sum := ms.SetEmptySum() - sum.SetAggregationTemporality(pmetric.AggregationTemporalityDelta) return &otlpMetrics{ metrics: metrics, - ms: sum, + sums: make(map[string]pmetric.Sum), + sm: sm, + } +} + +func (om *otlpMetrics) getOrCreateSum(metricName string) pmetric.Sum { + if sum, ok := om.sums[metricName]; ok { + return sum } + ms := om.sm.Metrics().AppendEmpty() + ms.SetName(metricName) + sum := ms.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityDelta) + om.sums[metricName] = sum + return sum } func (om *otlpMetrics) addOTLPSum(timestamp time.Time, value float64, signal usageSignal) error { + mapping, ok := signalToMetric[signal] + if !ok { + return fmt.Errorf("unknown usage signal: %s", signal) + } + intVal, err := convertFloat64ToInt64(value) if err != nil { return err } - d := om.ms.DataPoints().AppendEmpty() + sum := om.getOrCreateSum(mapping.metricName) + d := sum.DataPoints().AppendEmpty() d.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) d.SetIntValue(intVal) - d.Attributes().PutStr("signal", string(signal)) + if mapping.signal != "" { + d.Attributes().PutStr("signal", mapping.signal) + } return nil } diff --git a/agent/usage_report.go b/agent/usage_report.go index 0d947a021c..f48e4e333b 100644 --- a/agent/usage_report.go +++ b/agent/usage_report.go @@ -89,6 +89,8 @@ func (ur *usageTracker) completeSend() { type usageSignal string var ( - signal_traces usageSignal = "traces" - signal_logs usageSignal = "logs" + signal_traces usageSignal = "traces" + signal_logs usageSignal = "logs" + signal_events_received usageSignal = "events_received" + signal_events_dropped usageSignal = "events_dropped" ) diff --git a/app/app_test.go b/app/app_test.go index 1ac3a737cd..cc4bd18788 100644 --- a/app/app_test.go +++ b/app/app_test.go @@ -34,6 +34,7 @@ import ( "github.com/honeycombio/refinery/config" "github.com/honeycombio/refinery/internal/health" "github.com/honeycombio/refinery/internal/peer" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -235,11 +236,11 @@ func (w *countingTransmission) waitForCount(t testing.TB, n int) { // each test gets a unique port and redisDB. // // by default, every Redis instance supports 16 databases, we use redisDB as a way to separate test data -func defaultConfig(basePort int, redisDB int, apiURL string) *config.MockConfig { - return defaultConfigWithGRPC(basePort, redisDB, apiURL, false) +func defaultConfig(t testing.TB, basePort int, redisDB int, apiURL string) *config.MockConfig { + return defaultConfigWithGRPC(t, basePort, redisDB, apiURL, false) } -func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC bool) *config.MockConfig { +func defaultConfigWithGRPC(t testing.TB, basePort int, redisDB int, apiURL string, enableGRPC bool) *config.MockConfig { if redisDB >= 16 { panic("redisDB must be less than 16") } @@ -247,6 +248,8 @@ func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC apiURL = "http://api.honeycomb.io" } + redisHost, redisPort := redistest.Endpoint(t) + cfg := &config.MockConfig{ GetTracesConfigVal: config.TracesConfig{ SendTicker: config.Duration(2 * time.Millisecond), @@ -258,6 +261,7 @@ func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC AddRuleReasonToTrace: true, PeerManagementType: "redis", GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ + Host: redisHost + ":" + redisPort, Prefix: "refinery-app-test", Timeout: config.Duration(1 * time.Second), Database: redisDB, @@ -382,10 +386,19 @@ func newStartedApp( assert.NoError(t, err) err = startstop.Start(g.Objects(), nil) - assert.NoError(t, err) + require.NoError(t, err) + + // Wait for the HTTP server to be ready by polling the listen address. + listenAddr := c.GetListenAddr() + require.Eventually(t, func() bool { + conn, err := net.DialTimeout("tcp", listenAddr, 50*time.Millisecond) + if err != nil { + return false + } + conn.Close() + return true + }, 2*time.Second, 10*time.Millisecond, "server failed to start listening on %s", listenAddr) - // Racy: wait just a moment for ListenAndServe to start up. - time.Sleep(15 * time.Millisecond) return &a, g } @@ -432,7 +445,7 @@ func TestAppIntegration(t *testing.T) { redisDB := 2 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) app, graph := newStartedApp(t, nil, nil, cfg) // Send a root span, it should be sent in short order. @@ -679,7 +692,7 @@ func TestAppIntegrationSendKey(t *testing.T) { redisDB := 1 + i testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) cfg.GetAccessKeyConfigVal = config.AccessKeyConfig{ SendKey: tt.sendKey, SendKeyMode: tt.sendKeyMode, @@ -893,7 +906,7 @@ func TestAppIntegrationWithNonLegacyKey(t *testing.T) { redisDB := 3 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) a, graph := newStartedApp(t, nil, nil, cfg) a.IncomingRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) a.PeerRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) @@ -933,7 +946,7 @@ func TestAppIntegrationEmptyEvent(t *testing.T) { port := 19010 redisDB := 8 - cfg := defaultConfig(port, redisDB, "") + cfg := defaultConfig(t, port, redisDB, "") _, graph := newStartedApp(t, nil, nil, cfg) tt := []struct { @@ -996,7 +1009,7 @@ func TestPeerRouting(t *testing.T) { senders[i] = &transmit.MockTransmission{} peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 5 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") apps[i], graph = newStartedApp(t, senders[i], peers, cfg) defer startstop.Stop(graph.Objects(), nil) @@ -1071,7 +1084,7 @@ func TestHostMetadataSpanAdditions(t *testing.T) { redisDB := 7 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) cfg.AddHostMetadataToTrace = true app, graph := newStartedApp(t, nil, nil, cfg) @@ -1125,7 +1138,7 @@ func TestEventsEndpoint(t *testing.T) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 8 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") apps[i], graph = newStartedApp(t, senders[i], peers, cfg) defer startstop.Stop(graph.Objects(), nil) } @@ -1221,7 +1234,7 @@ func TestEventsEndpointWithNonLegacyKey(t *testing.T) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 10 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") app, graph := newStartedApp(t, senders[i], peers, cfg) app.IncomingRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) @@ -1309,7 +1322,7 @@ func TestOTLPProtobufIntegration(t *testing.T) { redisDB := 14 testServer := newTestAPIServer(t) - cfg := defaultConfigWithGRPC(port, redisDB, testServer.server.URL, true) + cfg := defaultConfigWithGRPC(t, port, redisDB, testServer.server.URL, true) app, graph := newStartedApp(t, nil, nil, cfg) // Create OTLP protobuf request @@ -1412,7 +1425,7 @@ func TestOTLPGRPCConcurrency(t *testing.T) { redisDB := 15 testServer := newTestAPIServer(t) - cfg := defaultConfigWithGRPC(port, redisDB, testServer.server.URL, true) + cfg := defaultConfigWithGRPC(t, port, redisDB, testServer.server.URL, true) _, graph := newStartedApp(t, nil, nil, cfg) // Connect to gRPC server @@ -1651,7 +1664,7 @@ func createBenchmarkOTLPRequest() *collectortrace.ExportTraceServiceRequest { func BenchmarkTracesOTLP(b *testing.B) { sender := &countingTransmission{} redisDB := 15 - cfg := defaultConfigWithGRPC(18000, redisDB, "", true) + cfg := defaultConfigWithGRPC(b, 18000, redisDB, "", true) _, graph := newStartedApp(b, sender, nil, cfg) defer func() { err := startstop.Stop(graph.Objects(), nil) @@ -1764,7 +1777,7 @@ func BenchmarkTracesOTLP(b *testing.B) { func BenchmarkTraces(b *testing.B) { sender := &countingTransmission{} redisDB := 1 - cfg := defaultConfig(11000, redisDB, "") + cfg := defaultConfig(b, 11000, redisDB, "") _, graph := newStartedApp(b, sender, nil, cfg) defer func() { err := startstop.Stop(graph.Objects(), nil) @@ -1804,8 +1817,8 @@ func BenchmarkTraces(b *testing.B) { } // createRulesBasedConfig creates a mock config with rules-based sampler containing downstream samplers -func createRulesBasedConfig(port, redisDB int, apiURL string, throughputGoal int) *config.MockConfig { - cfg := defaultConfig(port, redisDB, apiURL) +func createRulesBasedConfig(t testing.TB, port, redisDB int, apiURL string, throughputGoal int) *config.MockConfig { + cfg := defaultConfig(t, port, redisDB, apiURL) // Configure rules-based sampler with selective rules cfg.GetSamplerTypeVal = &config.RulesBasedSamplerConfig{ @@ -1890,7 +1903,7 @@ func TestRulesBasedSamplerWithDownstreamAndClusterChanges(t *testing.T) { // Phase 1: Initial setup with single-node cluster mockPeers := peer.NewMockPeers([]string{"http://localhost:20001"}, "http://localhost:20001") - cfg := createRulesBasedConfig(port, redisDB, testServer.server.URL, 100) + cfg := createRulesBasedConfig(t, port, redisDB, testServer.server.URL, 100) _, graph := newStartedApp(t, nil, mockPeers, cfg) defer startstop.Stop(graph.Objects(), nil) @@ -2180,7 +2193,7 @@ func BenchmarkDistributedTraces(b *testing.B) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 2 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(b, basePort, redisDB, "") apps[i], graph = newStartedApp(b, sender, peers, cfg) defer startstop.Stop(graph.Objects(), nil) diff --git a/build-docker.sh b/build-docker.sh index e47c388573..f1b9f8137d 100755 --- a/build-docker.sh +++ b/build-docker.sh @@ -3,101 +3,52 @@ set -o nounset set -o pipefail set -o xtrace -### Versioning and image tagging ### -# -# Three build scenarios: -# 1. CI release build: triggered by git tag -# - Stable (vX.Y.Z): tagged with major, minor, patch, and "latest" -# - Pre-release (vX.Y.Z-suffix): tagged only with exact version -# 2. CI branch build: version + CI job ID, tagged with branch name (+ "latest" if main) -# 3. Local build: version from git describe, tagged with that version - -# Get version info from git (used by branch and local builds) -# --tags: use any tag, not just annotated ones -# --match='v[0-9]*': only version tags (starts with v and a digit) -# --always: fall back to commit ID if no tag found -# e.g., v2.1.1-45-ga1b2c3d means commit a1b2c3d, 45 commits ahead of tag v2.1.1 -VERSION_FROM_GIT=$(git describe --tags --match='v[0-9]*' --always) - -if [[ -n "${CIRCLE_TAG:-}" ]]; then - # Release build (triggered by git tag) - VERSION=${CIRCLE_TAG#"v"} - - if [[ "${CIRCLE_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - # Stable release: tag with major, minor, patch, and latest - # e.g., v2.1.1 -> "2", "2.1", "2.1.1", "latest" - MAJOR_VERSION=${VERSION%%.*} - MINOR_VERSION=${VERSION%.*} - TAGS="$MAJOR_VERSION,$MINOR_VERSION,$VERSION,latest" - else - # Pre-release: only the exact version tag - # e.g., v3.0.0-rc1 -> "3.0.0-rc1" - TAGS="$VERSION" - fi - -elif [[ -n "${CIRCLE_BRANCH:-}" ]]; then - # CI branch build - # Version from git describe + CI job ID - # e.g., 2.1.1-45-ga1b2c3d-ci8675309 - VERSION="${VERSION_FROM_GIT#'v'}-ci${CIRCLE_BUILD_NUM}" - BRANCH_TAG=${CIRCLE_BRANCH//\//-} - TAGS="${VERSION},branch-${BRANCH_TAG}" - - # Main branch builds are tagged "latest" in the private registry - if [[ "${CIRCLE_BRANCH}" == "main" ]]; then - TAGS+=",latest" - fi - -else - # Local build - # Version from git describe only - # e.g., 2.1.1-45-ga1b2c3d - VERSION=${VERSION_FROM_GIT#'v'} - TAGS="${VERSION}" -fi - -GIT_COMMIT=${CIRCLE_SHA1:-$(git rev-parse HEAD)} +GCLOUD_REGISTRY="gcr.io/sre-team-418623" + +# Parse flags +PUSH=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --push) + PUSH=true + shift + ;; + *) + echo "Usage: $0 [--push]" + echo " --push Build and push to ${GCLOUD_REGISTRY}/refinery" + echo " (default) Build locally only" + exit 1 + ;; + esac +done + +VERSION=$(git describe --tags --match='v[0-9]*' --always) +VERSION=${VERSION#v} +GIT_COMMIT=$(git rev-parse HEAD) unset GOOS unset GOARCH export GOFLAGS="-ldflags=-X=main.BuildID=$VERSION" export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-$(make latest_modification_time)} -# Build the image once, either to a remote registry designated by PRIMARY_DOCKER_REPO -# or to the local repository as "ko.local/refinery:" if PRIMARY_DOCKER_REPO is not set. -export KO_DOCKER_REPO="${PRIMARY_DOCKER_REPO:-ko.local}" +# Force IPv4 to avoid IPv6 connectivity issues when pulling base image layers +export GODEBUG=preferIPv4=1 + +if [[ "$PUSH" == "true" ]]; then + export KO_DOCKER_REPO="$GCLOUD_REGISTRY" +else + export KO_DOCKER_REPO="ko.local" +fi -echo "Building image locally with ko for multi-registry push..." # shellcheck disable=SC2086 -IMAGE_REF=$(./ko publish \ - --tags "${TAGS}" \ +IMAGE_REF=$(ko publish \ + --tags "${VERSION}" \ --base-import-paths \ --platform "linux/amd64,linux/arm64" \ - --image-label org.opencontainers.image.source=https://github.com/honeycombio/refinery \ + --image-label org.opencontainers.image.source=https://github.com/khan/refinery \ --image-label org.opencontainers.image.licenses=Apache-2.0 \ --image-label org.opencontainers.image.revision=${GIT_COMMIT} \ ./cmd/refinery) echo "Built image: ${IMAGE_REF}" - -# If COPY_DOCKER_REPOS is set, copy the built image to each of the listed registries. -# This is a comma-separated list of registry/repo names, e.g. -# "public.ecr.aws/honeycombio,ghcr.io/honeycombio/refinery" -if [[ -n "${COPY_DOCKER_REPOS:-}" ]]; then - echo "Pushing to multiple registries: ${COPY_DOCKER_REPOS}" - - IFS=',' read -ra REPOS <<< "$COPY_DOCKER_REPOS" - for REPO in "${REPOS[@]}"; do - REPO=$(echo "$REPO" | xargs) # trim whitespace - echo "Tagging and pushing to: $REPO" - - # Tag for each tag in the TAGS list - IFS=',' read -ra TAG_LIST <<< "$TAGS" - for TAG in "${TAG_LIST[@]}"; do - TAG=$(echo "$TAG" | xargs) # trim whitespace - TARGET_IMAGE="$REPO/refinery:$TAG" - echo "Copying $IMAGE_REF to $TARGET_IMAGE" - ./crane copy "$IMAGE_REF" "$TARGET_IMAGE" - done - done -fi diff --git a/cmd/refinery/main.go b/cmd/refinery/main.go index 4f68b6a036..efe4e82110 100644 --- a/cmd/refinery/main.go +++ b/cmd/refinery/main.go @@ -99,11 +99,9 @@ func main() { c, err := config.NewConfig(opts, version) if err != nil { - if configErr, isConfigErr := err.(*config.FileConfigError); isConfigErr && configErr.HasErrors() { - fmt.Printf("%+v\n", err) + fmt.Printf("%+v\n", err) + if c == nil { os.Exit(1) - } else { - fmt.Printf("%+v\n", err) } } if opts.Validate { diff --git a/collect/collect.go b/collect/collect.go index ea430892f1..0ce0be596b 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -113,7 +113,8 @@ type InMemCollector struct { hostname string - memMetricSample []rtmetrics.Sample // Memory monitoring using runtime/metrics + memMetricSample []rtmetrics.Sample // Memory monitoring using runtime/metrics + spanCounters []config.SpanCounter } // These are the names of the metrics we use to track the number of events sent to peers through the router. @@ -128,11 +129,14 @@ var inMemCollectorMetrics = []metrics.Metadata{ {Name: "trace_span_count", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans in a trace"}, {Name: "collector_incoming_queue", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans currently in the incoming queue"}, {Name: "collector_peer_queue_length", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of spans in the peer queue"}, + {Name: "collector_peer_queue_capacity", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "configured maximum number of spans in the peer queue"}, {Name: "collector_incoming_queue_length", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of spans in the incoming queue"}, + {Name: "collector_incoming_queue_capacity", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "configured maximum number of spans in the incoming queue"}, {Name: "collector_peer_queue", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans currently in the peer queue"}, {Name: "collector_cache_size", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of traces currently stored in the trace cache"}, {Name: "collect_cache_entries", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "Total number of traces currently stored in the cache from all workers"}, {Name: "memory_heap_allocation", Type: metrics.Gauge, Unit: metrics.Bytes, Description: "current heap allocation"}, + {Name: "memory_limit", Type: metrics.Gauge, Unit: metrics.Bytes, Description: "configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage)"}, {Name: "span_received", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans received by the collector"}, {Name: "span_processed", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans processed by the collector"}, {Name: "spans_waiting", Type: metrics.UpDown, Unit: metrics.Dimensionless, Description: "number of spans waiting to be processed by the collector"}, @@ -152,6 +156,7 @@ var inMemCollectorMetrics = []metrics.Metadata{ {Name: "dropped_from_stress", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans dropped due to stress relief"}, {Name: "kept_from_stress", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans kept due to stress relief"}, + {Name: "events_dropped", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of events dropped"}, {Name: "trace_kept_sample_rate", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "sample rate of kept traces"}, {Name: "trace_aggregate_sample_rate", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "aggregate sample rate of both kept and dropped traces"}, {Name: "collector_collect_loop_duration_ms", Type: metrics.Histogram, Unit: metrics.Milliseconds, Description: "duration of the collect loop, the primary event processing goroutine"}, @@ -171,6 +176,7 @@ func (i *InMemCollector) Start() error { i.Logger.Info().WithField("num_workers", numWorkers).Logf("Starting InMemCollector with %d workers", numWorkers) i.StressRelief.UpdateFromConfig() + i.initSpanCounters() // Set queue capacity metrics for stress relief calculations i.Metrics.Store(DENOMINATOR_INCOMING_CAP, float64(imcConfig.IncomingQueueSize)) i.Metrics.Store(DENOMINATOR_PEER_CAP, float64(imcConfig.PeerQueueSize)) @@ -240,6 +246,7 @@ func (i *InMemCollector) reloadConfigs() { i.SamplerFactory.ClearDynsamplers() i.StressRelief.UpdateFromConfig() + i.initSpanCounters() // Send reload signals to all workers to clear their local samplers // so that the new configuration will be propagated @@ -341,6 +348,13 @@ func (i *InMemCollector) monitor() { // Check worker health and report aggregated status i.Health.Ready(collectorHealthKey, i.isReady()) + // Emit queue capacity limits and memory limit so consumers can compute utilization + monitorConfig := i.Config.GetCollectionConfig() + i.Metrics.Gauge("collector_incoming_queue_capacity", float64(monitorConfig.IncomingQueueSize)) + i.Metrics.Gauge("collector_peer_queue_capacity", float64(monitorConfig.PeerQueueSize)) + maxAlloc := monitorConfig.GetMaxAlloc() + i.Metrics.Gauge("memory_limit", float64(maxAlloc)) + // Aggregate metrics totalIncoming := 0 totalPeer := 0 @@ -460,6 +474,7 @@ func (i *InMemCollector) ProcessSpanImmediately(sp *types.Span) (processed bool, if !keep { i.Metrics.Increment("dropped_from_stress") + i.Metrics.Increment("events_dropped") return true, false } @@ -544,6 +559,7 @@ func (i *InMemCollector) dealWithSentTrace(ctx context.Context, tr cache.TraceSe i.Transmission.EnqueueSpan(sp) return } + i.Metrics.Increment("events_dropped") i.Logger.Debug().WithField("trace_id", sp.TraceID).Logf("Dropping span because of previous decision to drop trace") } @@ -600,6 +616,8 @@ func (i *InMemCollector) send(ctx context.Context, trace sendableTrace) { // if we're supposed to drop this trace, and dry run mode is not enabled, then we're done. if !trace.KeepSample && !i.Config.GetIsDryRun() { i.Metrics.Increment("trace_send_dropped") + dropCount := int64(trace.DescendantCount()) + i.Metrics.Count("events_dropped", dropCount) i.Logger.Debug().WithFields(logFields).Logf("Dropping trace because of sampling decision") return } @@ -691,6 +709,80 @@ func (i *InMemCollector) addAdditionalAttributes(sp *types.Span) { } } +// initSpanCounters loads and initializes span counters from the current config. +// Must be called at startup and on config reload. +func (i *InMemCollector) initSpanCounters() { + counters := i.Config.GetSpanCounters() + for j := range counters { + if err := counters[j].Init(); err != nil { + i.Logger.Error().WithField("error", err).Logf("failed to initialize span counter %q", counters[j].Key) + } + } + i.mutex.Lock() + i.spanCounters = counters + i.mutex.Unlock() +} + +// findSuitableRootSpan returns the root span of the trace if one is present. +// If no root span has been identified, it falls back to the non-annotation +// span (i.e. not a span event or link) with the earliest timestamp, which is +// the most likely root. Returns nil if no suitable span exists. +func findSuitableRootSpan(t sendableTrace) *types.Span { + if t.RootSpan != nil { + return t.RootSpan + } + var best *types.Span + for _, sp := range t.GetSpans() { + if sp.AnnotationType() != types.SpanAnnotationTypeSpanEvent && + sp.AnnotationType() != types.SpanAnnotationTypeLink { + if best == nil || sp.Timestamp.Before(best.Timestamp) { + best = sp + } + } + } + return best +} + +// computeCustomCounts computes each counter's value by iterating all spans in the trace +// and attaches the results to the root span. +// Returns nil, nil if there are no counters configured or no suitable target span. +// +// Stress relief note: this runs inside sendTraces(), the sole consumer of the +// tracesToSend channel. Work is O(Nร—M) โ€” N spans ร— M counters โ€” so large +// traces with many counters slow the consumer, which deepens the outgoing +// queue. The stress relief system monitors queue depth as one of its stress +// inputs, so heavy custom-count configurations can raise the measured stress +// level and trigger earlier activation of stress relief. Additionally, spans +// processed via ProcessSpanImmediately (the stress-relief fast path) bypass the +// trace buffer entirely and never reach sendTraces, so custom counts are not +// computed or attached to stress-sampled traces. +func (i *InMemCollector) computeCustomCounts(t sendableTrace) (*types.Span, map[string]int64) { + i.mutex.RLock() + counters := i.spanCounters + i.mutex.RUnlock() + + if len(counters) == 0 { + return nil, nil + } + + targetSpan := findSuitableRootSpan(t) + if targetSpan == nil { + return nil, nil + } + + var rootData config.SpanData = &targetSpan.Data + counts := make(map[string]int64, len(counters)) + for _, sp := range t.GetSpans() { + for _, counter := range counters { + if counter.MatchesSpan(&sp.Data, rootData) { + counts[counter.Key]++ + } + } + } + + return targetSpan, counts +} + func (i *InMemCollector) sendTraces() { defer i.sendTracesWG.Done() @@ -698,6 +790,8 @@ func (i *InMemCollector) sendTraces() { i.Metrics.Histogram("collector_outgoing_queue", float64(len(i.tracesToSend))) _, span := otelutil.StartSpanMulti(context.Background(), i.Tracer, "sendTrace", map[string]interface{}{"num_spans": t.DescendantCount(), "tracesToSend_size": len(i.tracesToSend)}) + customCountTarget, customCounts := i.computeCustomCounts(t) + for _, sp := range t.GetSpans() { if i.Config.GetAddRuleReasonToTrace() { @@ -721,6 +815,13 @@ func (i *InMemCollector) sendTraces() { } } + // set custom span counts on the target span (root if present, else best fallback) + if sp == customCountTarget { + for k, v := range customCounts { + sp.Data.Set(k, v) + } + } + isDryRun := i.Config.GetIsDryRun() if isDryRun { sp.Data.Set(config.DryRunFieldName, t.shouldSend) diff --git a/collect/collect_test.go b/collect/collect_test.go index ffa97cab8b..a83de54535 100644 --- a/collect/collect_test.go +++ b/collect/collect_test.go @@ -477,8 +477,9 @@ func TestDryRunMode(t *testing.T) { transmission := coll.Transmission.(*transmit.MockTransmission) samplerFactory := &sample.SamplerFactory{ - Config: conf, - Logger: &logger.NullLogger{}, + Config: conf, + Logger: &logger.NullLogger{}, + Metrics: &metrics.NullMetrics{}, } sampler := samplerFactory.GetSamplerImplementationForKey("test") coll.SamplerFactory = samplerFactory @@ -1901,6 +1902,267 @@ func TestWorkerHealthReporting(t *testing.T) { }, 2*time.Second, 50*time.Millisecond, "InMemCollector should be healthy again after worker resumes") } +// customCountConf returns a base MockConfig suitable for custom span count tests. +func customCountConf(counters []config.SpanCounter) *config.MockConfig { + return &config.MockConfig{ + GetTracesConfigVal: config.TracesConfig{ + SendTicker: config.Duration(2 * time.Millisecond), + SendDelay: config.Duration(1 * time.Millisecond), + TraceTimeout: config.Duration(60 * time.Second), + MaxBatchSize: 500, + }, + SampleCache: config.SampleCacheConfig{ + KeptSize: 100, + DroppedSize: 100, + SizeCheckInterval: config.Duration(1 * time.Second), + }, + GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, + TraceIdFieldNames: []string{"trace.trace_id", "traceId"}, + ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, + GetCollectionConfigVal: config.CollectionConfig{ + WorkerCount: 2, + ShutdownDelay: config.Duration(1 * time.Millisecond), + IncomingQueueSize: 10, + PeerQueueSize: 10, + }, + SpanCounters: counters, + } +} + +// TestCustomSpanCounts_NoCounters verifies that when no counters are configured +// no custom fields are added to any span. +func TestCustomSpanCounts_NoCounters(t *testing.T) { + coll := newTestCollector(t, customCountConf(nil)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "no-counters" + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(2) + for _, ev := range events { + assert.Nil(t, ev.Data.Get("my.count"), "no custom count fields should be set when no counters are configured") + } +} + +// TestCustomSpanCounts_CountsLandOnRoot verifies that a counter with no +// conditions counts all spans and attaches the result to the root span only. +func TestCustomSpanCounts_CountsLandOnRoot(t *testing.T) { + counters := []config.SpanCounter{ + {Key: "all_spans"}, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "root-target" + for i := 0; i < 3; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(4) + require.Equal(t, 4, len(events)) + + var rootEvent *types.Event + var childEvents []*types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } else { + childEvents = append(childEvents, ev) + } + } + + require.NotNil(t, rootEvent) + // all 4 spans counted (3 children + root) + assert.Equal(t, int64(4), rootEvent.Data.Get("all_spans")) + for _, child := range childEvents { + assert.Nil(t, child.Data.Get("all_spans"), "custom count should not be set on child spans") + } +} + +// TestCustomSpanCounts_ConditionalCounting verifies that only spans matching +// a condition are counted. +func TestCustomSpanCounts_ConditionalCounting(t *testing.T) { + counters := []config.SpanCounter{ + { + Key: "error_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "error", Operator: config.EQ, Value: true}, + }, + }, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "conditional" + // 2 error spans + for i := 0; i < 2; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x", "error": true}), + APIKey: legacyAPIKey, + }, + }) + } + // 2 non-error spans + for i := 0; i < 2; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + var rootEvent *types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } + } + require.NotNil(t, rootEvent) + assert.Equal(t, int64(2), rootEvent.Data.Get("error_spans")) +} + +// TestCustomSpanCounts_MultipleCounters verifies that multiple counters with +// different conditions produce independent counts on the root span. +func TestCustomSpanCounts_MultipleCounters(t *testing.T) { + counters := []config.SpanCounter{ + { + Key: "db_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "db.system", Operator: config.Exists}, + }, + }, + { + Key: "error_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "error", Operator: config.EQ, Value: true}, + }, + }, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "multi-counter" + spans := []map[string]interface{}{ + {"trace.parent_id": "x", "db.system": "postgresql"}, + {"trace.parent_id": "x", "db.system": "postgresql", "error": true}, + {"trace.parent_id": "x", "error": true}, + {"trace.parent_id": "x"}, + } + for _, data := range spans { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, data), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + var rootEvent *types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } + } + require.NotNil(t, rootEvent) + assert.Equal(t, int64(2), rootEvent.Data.Get("db_spans"), "2 spans have db.system") + assert.Equal(t, int64(2), rootEvent.Data.Get("error_spans"), "2 spans have error=true") +} + +// TestCustomSpanCounts_NoRootSpan verifies that when a trace times out without +// a root span, custom counts land on the first non-annotation span instead. +func TestCustomSpanCounts_NoRootSpan(t *testing.T) { + conf := customCountConf([]config.SpanCounter{{Key: "all_spans"}}) + conf.GetTracesConfigVal.TraceTimeout = config.Duration(5 * time.Millisecond) + + coll := newTestCollector(t, conf) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "no-root" + // annotation span: should not be the target + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: func() types.Payload { + p := types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}) + p.MetaAnnotationType = "span_event" + return p + }(), + APIKey: legacyAPIKey, + }, + }) + // regular span: should be the target + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + + events := transmission.GetBlock(2) + require.Equal(t, 2, len(events)) + + // Exactly one span should carry the custom count (the first real span). + var counted []*types.Event + for _, ev := range events { + if ev.Data.Get("all_spans") != nil { + counted = append(counted, ev) + } + } + require.Equal(t, 1, len(counted), "custom count should appear on exactly one span when there is no root") + assert.Equal(t, int64(2), counted[0].Data.Get("all_spans"), "both spans should be counted") +} + // BenchmarkCollectorWithSamplers runs benchmarks for different sampler configurations. // This is a tricky benchmark to interpret because just setting up the input data // can easily be more expensive than the collector's routing code. The goal is to diff --git a/collect/multi_loop_test.go b/collect/multi_loop_test.go index 85e184cf88..956055aa9a 100644 --- a/collect/multi_loop_test.go +++ b/collect/multi_loop_test.go @@ -534,7 +534,7 @@ func TestCoordinatedReload(t *testing.T) { PeerQueueSize: 3000, WorkerCount: 4, }, - GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, + GetSamplerTypeVal: &config.DynamicSamplerConfig{SampleRate: 1, FieldList: []string{"test"}}, ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, TraceIdFieldNames: []string{"trace.trace_id", "traceId"}, SampleCache: config.SampleCacheConfig{ @@ -546,92 +546,71 @@ func TestCoordinatedReload(t *testing.T) { collector := newTestCollector(t, conf) - // Send some test spans to create dataset samplers - processedInitial := int32(0) - for i := 0; i < 10; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: fmt.Sprintf("dataset-%d", i%3), - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("reload-trace-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedInitial, 1) - } + // waitForSamplersCreated waits until at least one worker has a sampler, + // proving traces were actually processed and makeDecision was called. + waitForSamplersCreated := func(msg string) { + t.Helper() + assert.Eventually(t, func() bool { + total := 0 + for _, worker := range collector.workers { + ch := make(chan struct{}) + worker.pause <- ch + total += len(worker.datasetSamplers) + close(ch) + } + return total > 0 + }, 2*time.Second, 10*time.Millisecond, msg) } - // Wait for initial spans to be processed - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedInitial) >= 8 - }, 2*time.Second, 10*time.Millisecond, "Initial spans should be processed") - - // Trigger a reload - this should cause workers to recreate their samplers - collector.sendReloadSignal("hash1", "hash2") - - // Give a moment for the reload signal to be processed (reload is async) - // We'll verify the reload worked by checking that spans still get processed - time.Sleep(50 * time.Millisecond) - - // Check that samplers were recreated by sending more spans - processedAfterReload := int32(0) - for i := 0; i < 20; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: "test.reload", - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("after-reload-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedAfterReload, 1) - } + // waitForSamplersCleared waits until all workers have empty datasetSamplers. + waitForSamplersCleared := func(msg string) { + t.Helper() + assert.Eventually(t, func() bool { + for _, worker := range collector.workers { + ch := make(chan struct{}) + worker.pause <- ch + n := len(worker.datasetSamplers) + close(ch) + if n > 0 { + return false + } + } + return true + }, 2*time.Second, 10*time.Millisecond, msg) } - // Verify spans were processed after reload - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedAfterReload) >= 15 - }, 2*time.Second, 100*time.Millisecond, "Spans should be processed after reload") - - // Trigger another reload to verify multiple reloads work - collector.sendReloadSignal("hash2", "hash3") - time.Sleep(50 * time.Millisecond) - - // Send more spans to verify system still works - processedAfterSecondReload := int32(0) - for i := 0; i < 20; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: "test.reload2", - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("after-second-reload-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedAfterSecondReload, 1) + sendSpans := func(n int, dataset, traceIDPrefix string) { + for i := 0; i < n; i++ { + span := &types.Span{ + Event: &types.Event{ + APIHost: "http://api.honeycomb.io", + APIKey: legacyAPIKey, + Dataset: fmt.Sprintf("%s", dataset), + SampleRate: 1, + Timestamp: time.Now(), + Data: types.Payload{}, + }, + TraceID: fmt.Sprintf("%s-%d", traceIDPrefix, i), + IsRoot: true, + ArrivalTime: time.Now(), + } + collector.AddSpan(span) //nolint:errcheck } } - // Verify spans were processed after second reload - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedAfterSecondReload) >= 15 - }, 2*time.Second, 100*time.Millisecond, "Spans should be processed after second reload") + // Send spans and wait for workers to process them and create samplers. + sendSpans(20, "dataset", "reload-trace") + waitForSamplersCreated("samplers should be created before first reload") + + // Reload and verify all workers clear their samplers. + collector.sendReloadSignal("dataset", "hash2") + waitForSamplersCleared("samplers should be cleared after first reload") + + // Send spans again; samplers must be recreated, proving the system still works. + sendSpans(20, "dataset", "after-reload") + waitForSamplersCreated("samplers should be recreated after first reload") + + // Second reload cycle. + collector.sendReloadSignal("dataset", "hash3") + waitForSamplersCleared("samplers should be cleared after second reload") } diff --git a/config.md b/config.md index fb1b4ca340..23178b357d 100644 --- a/config.md +++ b/config.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Configuration Documentation This is the documentation for the configuration file for Honeycomb's Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-04-09 at 22:21:32 UTC. ## The Config file @@ -181,16 +181,31 @@ ReceiveKeys is a set of Honeycomb API keys that the proxy will treat specially. This list only applies to span traffic - other Honeycomb API actions will be proxied through to the upstream API directly without modifying keys. -- Not eligible for live reload. +- Eligible for live reload. - Type: `stringarray` - Example: `your-key-goes-here` +### `ReceiveKeyIDs` + +ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + +When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose Honeycomb ingest key ID matches an entry in this list will be accepted. +The key ID is the `id` field returned by the Honeycomb `/1/auth` endpoint; it is distinct from the full API key value. +This allows authorization based on key IDs rather than full key values, which avoids storing secret key material in the configuration file. +Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. +Note: This feature does not support legacy API keys. +Only Honeycomb Ingest Keys (which have a key ID) are compatible with this setting. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `your-key-id-goes-here` + ### `AcceptOnlyListedKeys` AcceptOnlyListedKeys is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. -If `true`, then only traffic using the keys listed in `ReceiveKeys` is accepted. -Events arriving with API keys not in the `ReceiveKeys` list will be rejected with an HTTP `401` error. +If `true`, then only traffic using the keys listed in `ReceiveKeys` or whose key ID is listed in `ReceiveKeyIDs` is accepted. +Events arriving with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. This setting is applied **before** the `SendKey` and `SendKeyMode` settings. @@ -660,6 +675,22 @@ In rare circumstances, compression costs may outweigh the benefits, in which cas - Default: `gzip` - Options: `none`, `gzip` +### `AdditionalAttributes` + +AdditionalAttributes adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + +This is useful for injecting deployment-specific metadata (such as a cluster ID or environment name) into metrics so they can be filtered or grouped in the metrics backend. +Both keys and values must be strings. +When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. +When supplying via the command line, the value should be a key value pair. +If multiple key-value pairs are needed, each should be supplied via its own command line flag. +The key-value pairs must use ':' as the separator. + +- Not eligible for live reload. +- Type: `map` +- Example: `pipeline.id:'12345',rollout.id:'67890'` +- Environment variable: `REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES` + ## OpenTelemetry Tracing `OTelTracing` contains configuration for Refinery's own tracing. diff --git a/config/cmdenv.go b/config/cmdenv.go index d96f008a7f..4daa63a031 100644 --- a/config/cmdenv.go +++ b/config/cmdenv.go @@ -43,6 +43,7 @@ type CmdEnv struct { OpAMPEndpoint string `long:"opamp-server-url" env:"REFINERY_OPAMP_ENDPOINT" description:"URL of the OpAMP server to use for remote management."` TelemetryEndpoint string `long:"telemetry-endpoint" env:"REFINERY_TELEMETRY_ENDPOINT" description:"Endpoint to send Refinery's internal telemetry to. This is separate from the Honeycomb API endpoint and is used for sending metrics about Refinery's performance."` OTelMetricsAPIKey string `long:"otel-metrics-api-key" env:"REFINERY_OTEL_METRICS_API_KEY" description:"API key for OTel metrics if being sent to Honeycomb. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` + OTelMetricsAdditionalAttributes map[string]string `long:"otel-metrics-additional-attributes" env:"REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES" env-delim:"," description:"Additional attributes to add as resource attributes on all OpenTelemetry metrics emitted by Refinery. When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. When supplying via the command line, the value should be a key value pair. If multiple key-value pairs are needed, each should be supplied via its own command line flag. The key-value pairs must use ':' as the separator."` OTelTracesAPIKey string `long:"otel-traces-api-key" env:"REFINERY_OTEL_TRACES_API_KEY" description:"API key for OTel traces if being sent to Honeycomb. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` QueryAuthToken string `long:"query-auth-token" env:"REFINERY_QUERY_AUTH_TOKEN" description:"Token for debug/management queries. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` AvailableMemory MemorySize `long:"available-memory" env:"REFINERY_AVAILABLE_MEMORY" description:"The maximum memory available for Refinery to use (ex: 4GiB)."` diff --git a/config/config.go b/config/config.go index 224fe07d76..a709cf1b41 100644 --- a/config/config.go +++ b/config/config.go @@ -151,6 +151,8 @@ type Config interface { GetAddCountsToRoot() bool + GetSpanCounters() []SpanCounter + GetConfigMetadata() []ConfigMetadata GetSampleCacheConfig() SampleCacheConfig diff --git a/config/config_test.go b/config/config_test.go index 30fd468d4a..04f6faaf94 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -909,6 +909,23 @@ func TestAdditionalAttributes(t *testing.T) { assert.Equal(t, map[string]string{"name": "foo", "other": "bar", "another": "OneHundred"}, c.GetAdditionalAttributes()) } +func TestOTelMetricsAdditionalAttributes(t *testing.T) { + cm := makeYAML( + "General.ConfigurationVersion", 2, + "OTelMetrics.AdditionalAttributes", map[string]string{ + "cluster.id": "my-cluster", + "environment": "production", + }, + ) + rm := makeYAML("ConfigVersion", 2) + config, rules := createTempConfigs(t, cm, rm) + c, err := getConfig([]string{"--no-validate", "--config", config, "--rules_config", rules}) + assert.NoError(t, err) + + otelCfg := c.GetOTelMetricsConfig() + assert.Equal(t, map[string]string{"cluster.id": "my-cluster", "environment": "production"}, otelCfg.AdditionalAttributes) +} + func TestHoneycombIdFieldsConfig(t *testing.T) { cm := makeYAML( "General.ConfigurationVersion", 2, diff --git a/config/file_config.go b/config/file_config.go index 43206dae90..e2490d8bc7 100644 --- a/config/file_config.go +++ b/config/file_config.go @@ -92,16 +92,21 @@ type NetworkConfig struct { type AccessKeyConfig struct { ReceiveKeys []string `yaml:"ReceiveKeys" default:"[]"` + ReceiveKeyIDs []string `yaml:"ReceiveKeyIDs" default:"[]"` SendKey string `yaml:"SendKey" cmdenv:"SendKey"` SendKeyMode string `yaml:"SendKeyMode" default:"none"` AcceptOnlyListedKeys bool `yaml:"AcceptOnlyListedKeys"` } -// IsAccepted checks if the given key is in the list of received keys or a configured SendKey. -// if not, it returns an error with the key truncated to 8 characters for logging. -func (a *AccessKeyConfig) IsAccepted(key string) error { +// IsAccepted checks if the given key (or its associated key ID) is authorized. +// keyID is the Honeycomb ingest key ID returned by the /1/auth endpoint; it may +// be empty if the lookup has not yet occurred or if the key is a legacy key. +// If not accepted, it returns an error with the key truncated to 8 characters for logging. +func (a *AccessKeyConfig) IsAccepted(key, keyID string) error { if a.AcceptOnlyListedKeys { - if (len(a.SendKey) > 0 && key == a.SendKey) || slices.Contains(a.ReceiveKeys, key) { + if (len(a.SendKey) > 0 && key == a.SendKey) || + slices.Contains(a.ReceiveKeys, key) || + (keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { return nil } @@ -110,10 +115,15 @@ func (a *AccessKeyConfig) IsAccepted(key string) error { return nil } +// HasKeyIDs returns true if ReceiveKeyIDs has been configured. +func (a *AccessKeyConfig) HasKeyIDs() bool { + return len(a.ReceiveKeyIDs) > 0 +} + // GetReplaceKey checks the given API key against the configuration // and possibly replaces it with the configured SendKey, if the settings so indicate. // It returns the key to use, or an error if the key is invalid given the settings. -func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { +func (a *AccessKeyConfig) GetReplaceKey(apiKey, keyID string) (string, error) { if a.SendKey != "" { overwriteWith := "" switch a.SendKeyMode { @@ -129,10 +139,10 @@ func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { overwriteWith = a.SendKey } case "listedonly": - // only replace keys that are listed in the `ReceiveKeys` list, + // only replace keys that are listed in the `ReceiveKeys` or `ReceiveKeyIDs` list, // otherwise use original key overwriteWith = apiKey - if slices.Contains(a.ReceiveKeys, apiKey) { + if slices.Contains(a.ReceiveKeys, apiKey) || (keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { overwriteWith = a.SendKey } case "missingonly": @@ -143,11 +153,11 @@ func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { overwriteWith = a.SendKey } case "unlisted": - // only replace nonblank keys that are NOT listed in the `ReceiveKeys` list + // only replace nonblank keys that are NOT listed in the `ReceiveKeys` or `ReceiveKeyIDs` list // otherwise use original key if apiKey != "" { overwriteWith = apiKey - if !slices.Contains(a.ReceiveKeys, apiKey) { + if !slices.Contains(a.ReceiveKeys, apiKey) && !(keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { overwriteWith = a.SendKey } } @@ -189,10 +199,10 @@ func (dt *DefaultTrue) UnmarshalText(text []byte) error { } type RefineryTelemetryConfig struct { - AddRuleReasonToTrace bool `yaml:"AddRuleReasonToTrace"` - AddSpanCountToRoot *DefaultTrue `yaml:"AddSpanCountToRoot" default:"true"` // Avoid pointer woe on access, use GetAddSpanCountToRoot() instead. - AddCountsToRoot bool `yaml:"AddCountsToRoot"` - AddHostMetadataToTrace *DefaultTrue `yaml:"AddHostMetadataToTrace" default:"true"` // Avoid pointer woe on access, use GetAddHostMetadataToTrace() instead. + AddRuleReasonToTrace bool `yaml:"AddRuleReasonToTrace"` + AddSpanCountToRoot *DefaultTrue `yaml:"AddSpanCountToRoot" default:"true"` // Avoid pointer woe on access, use GetAddSpanCountToRoot() instead. + AddCountsToRoot bool `yaml:"AddCountsToRoot"` + AddHostMetadataToTrace *DefaultTrue `yaml:"AddHostMetadataToTrace" default:"true"` // Avoid pointer woe on access, use GetAddHostMetadataToTrace() instead. } type TracesConfig struct { @@ -268,12 +278,13 @@ type PrometheusMetricsConfig struct { } type OTelMetricsConfig struct { - Enabled bool `yaml:"Enabled" default:"false"` - APIHost string `yaml:"APIHost" default:"https://api.honeycomb.io" cmdenv:"TelemetryEndpoint"` - APIKey string `yaml:"APIKey" cmdenv:"OTelMetricsAPIKey,HoneycombAPIKey"` - Dataset string `yaml:"Dataset" default:"Refinery Metrics"` - Compression string `yaml:"Compression" default:"gzip"` - ReportingInterval Duration `yaml:"ReportingInterval" default:"30s"` + Enabled bool `yaml:"Enabled" default:"false"` + APIHost string `yaml:"APIHost" default:"https://api.honeycomb.io" cmdenv:"TelemetryEndpoint"` + APIKey string `yaml:"APIKey" cmdenv:"OTelMetricsAPIKey,HoneycombAPIKey"` + Dataset string `yaml:"Dataset" default:"Refinery Metrics"` + Compression string `yaml:"Compression" default:"gzip"` + ReportingInterval Duration `yaml:"ReportingInterval" default:"30s"` + AdditionalAttributes map[string]string `yaml:"AdditionalAttributes" default:"{}" cmdenv:"OTelMetricsAdditionalAttributes"` } type OTelTracingConfig struct { @@ -597,6 +608,13 @@ func writeYAMLToFile(data any, filename string) error { // nil, it uses the command line arguments. // It also dumps the config and rules to the given files, if specified, which // will cause the program to exit. +// +// Return values follow an intentional two-level contract: +// - (nil, err): fatal error โ€” config could not be loaded or has hard validation +// errors; the caller should not proceed. +// - (cfg, err): non-fatal warning โ€” config loaded successfully but has deprecation +// or advisory warnings; the caller may log err and proceed using cfg. +// - (cfg, nil): success. func NewConfig(opts *CmdEnv, currentVersion ...string) (Config, error) { cData, rData, err := newConfigAndRules(opts) if err != nil { @@ -604,8 +622,7 @@ func NewConfig(opts *CmdEnv, currentVersion ...string) (Config, error) { } cfg, err := newFileConfig(opts, cData, rData, currentVersion...) - // only exit if we have no config at all; if it fails validation, we'll - // do the rest and return it anyway + // only exit on fatal errors (cfg == nil); non-nil cfg with err means warnings only if err != nil && cfg == nil { return nil, err } @@ -1116,6 +1133,13 @@ func (f *fileConfig) GetAddCountsToRoot() bool { return f.mainConfig.Telemetry.AddCountsToRoot } +func (f *fileConfig) GetSpanCounters() []SpanCounter { + f.mux.RLock() + defer f.mux.RUnlock() + + return f.rulesConfig.SpanCounters +} + func (f *fileConfig) GetSampleCacheConfig() SampleCacheConfig { f.mux.RLock() defer f.mux.RUnlock() diff --git a/config/file_config_test.go b/config/file_config_test.go index d09e9e3ba6..0d6bd3945f 100644 --- a/config/file_config_test.go +++ b/config/file_config_test.go @@ -2,6 +2,7 @@ package config import ( "errors" + "fmt" "runtime" "testing" @@ -56,6 +57,7 @@ func Test_GetQueueSizesPerWorker(t *testing.T) { func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { type fields struct { ReceiveKeys []string + ReceiveKeyIDs []string SendKey string SendKeyMode string AcceptOnlyListedKeys bool @@ -71,6 +73,12 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { SendKey: "sendkey", SendKeyMode: "listedonly", } + fListedWithKeyIDs := fields{ + ReceiveKeys: []string{"key1", "key2"}, + ReceiveKeyIDs: []string{"kid1", "kid2"}, + SendKey: "sendkey", + SendKeyMode: "listedonly", + } fMissing := fields{ ReceiveKeys: []string{"key1", "key2"}, SendKey: "sendkey", @@ -81,36 +89,50 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { SendKey: "sendkey", SendKeyMode: "unlisted", } + fUnlistedWithKeyIDs := fields{ + ReceiveKeys: []string{"key1", "key2"}, + ReceiveKeyIDs: []string{"kid1", "kid2"}, + SendKey: "sendkey", + SendKeyMode: "unlisted", + } tests := []struct { name string fields fields apiKey string + keyID string want string wantErr bool }{ - {"send all known", fSendAll, "key1", "sendkey", false}, - {"send all unknown", fSendAll, "userkey", "sendkey", false}, - {"send all missing", fSendAll, "", "sendkey", false}, - {"listed known", fListed, "key1", "sendkey", false}, - {"listed unknown", fListed, "userkey", "userkey", false}, - {"listed missing", fListed, "", "", true}, - {"missing known", fMissing, "key1", "key1", false}, - {"missing unknown", fMissing, "userkey", "userkey", false}, - {"missing missing", fMissing, "", "sendkey", false}, - {"unlisted known", fUnlisted, "key1", "key1", false}, - {"unlisted unknown", fUnlisted, "userkey", "sendkey", false}, - {"unlisted missing", fUnlisted, "", "", true}, + {"send all known", fSendAll, "key1", "", "sendkey", false}, + {"send all unknown", fSendAll, "userkey", "", "sendkey", false}, + {"send all missing", fSendAll, "", "", "sendkey", false}, + {"listed known", fListed, "key1", "", "sendkey", false}, + {"listed unknown", fListed, "userkey", "", "userkey", false}, + {"listed missing", fListed, "", "", "", true}, + {"listed by keyID known", fListedWithKeyIDs, "unknownkey", "kid1", "sendkey", false}, + {"listed by keyID unknown", fListedWithKeyIDs, "unknownkey", "unknownkid", "unknownkey", false}, + {"listed by keyID empty", fListedWithKeyIDs, "unknownkey", "", "unknownkey", false}, + {"missing known", fMissing, "key1", "", "key1", false}, + {"missing unknown", fMissing, "userkey", "", "userkey", false}, + {"missing missing", fMissing, "", "", "sendkey", false}, + {"unlisted known", fUnlisted, "key1", "", "key1", false}, + {"unlisted unknown", fUnlisted, "userkey", "", "sendkey", false}, + {"unlisted missing", fUnlisted, "", "", "", true}, + {"unlisted by keyID known", fUnlistedWithKeyIDs, "unknownkey", "kid1", "unknownkey", false}, + {"unlisted by keyID unknown", fUnlistedWithKeyIDs, "unknownkey", "unknownkid", "sendkey", false}, + {"unlisted by keyID empty", fUnlistedWithKeyIDs, "unknownkey", "", "sendkey", false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &AccessKeyConfig{ ReceiveKeys: tt.fields.ReceiveKeys, + ReceiveKeyIDs: tt.fields.ReceiveKeyIDs, SendKey: tt.fields.SendKey, SendKeyMode: tt.fields.SendKeyMode, AcceptOnlyListedKeys: tt.fields.AcceptOnlyListedKeys, } - got, err := a.GetReplaceKey(tt.apiKey) + got, err := a.GetReplaceKey(tt.apiKey, tt.keyID) if (err != nil) != tt.wantErr { t.Errorf("AccessKeyConfig.GetReplaceKey() error = %v, wantErr %v", err, tt.wantErr) return @@ -125,6 +147,7 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { func TestAccessKeyConfig_IsAccepted(t *testing.T) { type fields struct { ReceiveKeys []string + ReceiveKeyIDs []string SendKey string SendKeyMode string AcceptOnlyListedKeys bool @@ -133,24 +156,33 @@ func TestAccessKeyConfig_IsAccepted(t *testing.T) { name string fields fields key string + keyID string want error }{ - {"no keys", fields{}, "key1", nil}, - {"known key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key1", nil}, - {"unknown key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key2", errors.New("api key key2... not found in list of authorized keys")}, - {"reject missing key with sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "", errors.New("api key ... not found in list of authorized keys")}, - {"reject missing key without sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "", errors.New("api key ... not found in list of authorized keys")}, - {"accept sendkey", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "key2", nil}, + {"no keys", fields{}, "key1", "", nil}, + {"known key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key1", "", nil}, + {"unknown key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key2", "", errors.New("api key key2... not found in list of authorized keys")}, + {"reject missing key with sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "", "", errors.New("api key ... not found in list of authorized keys")}, + {"reject missing key without sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "", "", errors.New("api key ... not found in list of authorized keys")}, + {"accept sendkey", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "key2", "", nil}, + // ReceiveKeyIDs tests + {"known key id", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "kid1", nil}, + {"unknown key id", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "kid2", errors.New("api key anykey... not found in list of authorized keys")}, + {"key id with empty keyID param", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "", errors.New("api key anykey... not found in list of authorized keys")}, + {"accept by key id when full key not listed", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key2", "kid1", nil}, + {"accept by full key when key id not listed", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key1", "kid2", nil}, + {"reject when neither full key nor key id match", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key2", "kid2", errors.New("api key key2... not found in list of authorized keys")}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &AccessKeyConfig{ ReceiveKeys: tt.fields.ReceiveKeys, + ReceiveKeyIDs: tt.fields.ReceiveKeyIDs, SendKey: tt.fields.SendKey, SendKeyMode: tt.fields.SendKeyMode, AcceptOnlyListedKeys: tt.fields.AcceptOnlyListedKeys, } - err := a.IsAccepted(tt.key) + err := a.IsAccepted(tt.key, tt.keyID) if tt.want == nil { require.NoError(t, err) return @@ -160,6 +192,104 @@ func TestAccessKeyConfig_IsAccepted(t *testing.T) { } } +func BenchmarkAccessKeyConfig_IsAccepted(b *testing.B) { + // Generate realistic key lists + makeKeys := func(n int) []string { + keys := make([]string, n) + for i := range keys { + keys[i] = fmt.Sprintf("key-%06d", i) + } + return keys + } + + benchmarks := []struct { + name string + config AccessKeyConfig + key string + keyID string + }{ + { + name: "no_filtering", + config: AccessKeyConfig{AcceptOnlyListedKeys: false}, + key: "anykey", + keyID: "", + }, + { + name: "ReceiveKeys_10_match_last", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(10), + AcceptOnlyListedKeys: true, + }, + key: "key-000009", + keyID: "", + }, + { + name: "ReceiveKeys_100_match_last", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "key-000099", + keyID: "", + }, + { + name: "ReceiveKeys_100_no_match", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "unknown-key", + keyID: "", + }, + { + name: "ReceiveKeyIDs_10_match_last", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(10), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "key-000009", + }, + { + name: "ReceiveKeyIDs_100_match_last", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "key-000099", + }, + { + name: "ReceiveKeyIDs_100_no_match", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "unknown-kid", + }, + { + name: "both_100_match_by_keyID", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "unknown-key", + keyID: "key-000050", + }, + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = bm.config.IsAccepted(bm.key, bm.keyID) + } + }) + } +} + func TestCalculateSamplerKey(t *testing.T) { testCases := []struct { name string diff --git a/config/metadata/configMeta.yaml b/config/metadata/configMeta.yaml index 289f53e905..32c4ff1aed 100644 --- a/config/metadata/configMeta.yaml +++ b/config/metadata/configMeta.yaml @@ -213,7 +213,7 @@ groups: valuetype: stringarray v1name: APIKeys example: "your-key-goes-here" - reload: false + reload: true validations: - type: elementType arg: string @@ -223,20 +223,45 @@ groups: will be proxied through to the upstream API directly without modifying keys. + - name: ReceiveKeyIDs + type: stringarray + valuetype: stringarray + example: "your-key-id-goes-here" + firstversion: v3.2 + reload: true + validations: + - type: elementType + arg: string + summary: is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + description: > + When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + Honeycomb ingest key ID matches an entry in this list will be accepted. + The key ID is the `id` field returned by the Honeycomb `/1/auth` + endpoint; it is distinct from the full API key value. + + This allows authorization based on key IDs rather than full key values, + which avoids storing secret key material in the configuration file. + Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. + + Note: This feature does not support legacy API keys. Only Honeycomb + Ingest Keys (which have a key ID) are compatible with this setting. + - name: AcceptOnlyListedKeys type: bool valuetype: conditional extra: nostar APIKeys default: false reload: true - validation: + validations: - type: requiredWith arg: ReceiveKeys + - type: requiredWith + arg: ReceiveKeyIDs summary: is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. description: > - If `true`, then only traffic using the keys listed in `ReceiveKeys` is - accepted. Events arriving with API keys not in the `ReceiveKeys` list - will be rejected with an HTTP `401` error. + If `true`, then only traffic using the keys listed in `ReceiveKeys` or + whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. @@ -673,17 +698,14 @@ groups: valuetype: map example: "pipeline.id:'12345',rollout.id:'67890'" reload: false - validations: - - type: elementType - arg: string summary: adds the provided attributes to all logs written by the Honeycomb logger. envvar: REFINERY_HONEYCOMB_LOGGER_ADDITIONAL_ATTRIBUTES commandline: logger-additional-attributes description: > - When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. - When supplying via the command line, the value should be a key value pair. - If multiple key-value pairs are needed, each should be supplied via its own command line flag. - The key-value pairs must use ':' as the separator. + When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. + When supplying via the command line, the value should be a key value pair. + If multiple key-value pairs are needed, each should be supplied via its own command line flag. + The key-value pairs must use ':' as the separator. - name: StdoutLogger title: "Stdout Logger" @@ -916,6 +938,29 @@ groups: compression costs may outweigh the benefits, in which case `none` may be used. + - name: AdditionalAttributes + type: map + valuetype: map + example: "pipeline.id:'12345',rollout.id:'67890'" + reload: false + firstversion: v3.2 + validations: + - type: elementType + arg: string + summary: adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + envvar: REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES + commandline: otel-metrics-additional-attributes + description: > + This is useful for injecting deployment-specific metadata (such as + a cluster ID or environment name) into metrics so they can be + filtered or grouped in the metrics backend. + Both keys and values must be strings. + + When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. + When supplying via the command line, the value should be a key value pair. + If multiple key-value pairs are needed, each should be supplied via its own command line flag. + The key-value pairs must use ':' as the separator. + - name: OTelTracing title: "OpenTelemetry Tracing" description: contains configuration for Refinery's own tracing. diff --git a/config/metadata/rulesMeta.yaml b/config/metadata/rulesMeta.yaml index b80e4ee983..a4a9a5c823 100644 --- a/config/metadata/rulesMeta.yaml +++ b/config/metadata/rulesMeta.yaml @@ -738,3 +738,32 @@ groups: The best practice is to always specify `Datatype`; this avoids ambiguity, allows for more accurate comparisons, and offers a minor performance improvement. + + - name: SpanCounters + title: "Custom Span Count Configuration" + sortorder: 80 + description: > + Defines a single custom span counter. Each counter has a Key that names + the field written to the root span, and an optional list of Conditions + that must all match for a span to be counted. Spans are counted when + all of the entry's Conditions match. If Conditions is empty, every span + in the trace is counted. The counter value is written to the root span + under the key specified by `Key`. If no root span exists when the trace + is sent, the counter is written to the first non-annotation span instead. + fields: + - name: Key + type: string + validations: + - type: notempty + summary: is the field name written to the root span with the counter value. + description: > + The name of the field that will be added to the root span. Must not + be empty. + + - name: Conditions + type: objectarray + summary: is the list of conditions a span must satisfy to be counted. + description: > + All conditions must match for a span to be counted. If empty, every + span in the trace is counted. Uses the same condition format as + rules-based sampler conditions. diff --git a/config/mock.go b/config/mock.go index 785197a795..58660a281f 100644 --- a/config/mock.go +++ b/config/mock.go @@ -52,6 +52,7 @@ type MockConfig struct { AdditionalErrorFields []string AddSpanCountToRoot bool AddCountsToRoot bool + SpanCounters []SpanCounter CacheOverrunStrategy string SampleCache SampleCacheConfig StressRelief StressReliefConfig @@ -415,6 +416,13 @@ func (f *MockConfig) GetAddCountsToRoot() bool { return f.AddSpanCountToRoot } +func (f *MockConfig) GetSpanCounters() []SpanCounter { + f.Mux.RLock() + defer f.Mux.RUnlock() + + return f.SpanCounters +} + func (f *MockConfig) GetSampleCacheConfig() SampleCacheConfig { f.Mux.RLock() defer f.Mux.RUnlock() diff --git a/config/sampler_config.go b/config/sampler_config.go index 2560d322e8..0cc4ce00b5 100644 --- a/config/sampler_config.go +++ b/config/sampler_config.go @@ -172,8 +172,9 @@ func (v *RulesBasedDownstreamSampler) NameMeaningfulRate() string { } type V2SamplerConfig struct { - RulesVersion int `json:"rulesversion" yaml:"RulesVersion" validate:"required,ge=2"` - Samplers map[string]*V2SamplerChoice `json:"samplers" yaml:"Samplers,omitempty" validate:"required"` + RulesVersion int `json:"rulesversion" yaml:"RulesVersion" validate:"required,ge=2"` + Samplers map[string]*V2SamplerChoice `json:"samplers" yaml:"Samplers,omitempty" validate:"required"` + SpanCounters []SpanCounter `json:"spancounters,omitempty" yaml:"SpanCounters,omitempty" toml:",omitempty"` } type GetSamplingFielder interface { diff --git a/config/span_counter_config.go b/config/span_counter_config.go new file mode 100644 index 0000000000..1eb6c1130d --- /dev/null +++ b/config/span_counter_config.go @@ -0,0 +1,217 @@ +package config + +import "strings" + +// SpanData is the interface required for matching span fields in a SpanCounter. +// It is satisfied by *types.Payload. +type SpanData interface { + Get(key string) any + Exists(key string) bool +} + +// SpanCounter defines a custom span count to be computed and added to +// the root span under Key. Spans are counted if they satisfy all Conditions. +type SpanCounter struct { + Key string `yaml:"Key"` + Conditions []*RulesBasedSamplerCondition `yaml:"Conditions,omitempty"` +} + +// Init initializes all conditions. Must be called before MatchesSpan. +func (c *SpanCounter) Init() error { + for _, cond := range c.Conditions { + if err := cond.Init(); err != nil { + return err + } + } + return nil +} + +// MatchesSpan returns true if the span satisfies all conditions. +// span is the span being tested; root is the root span's data (may be nil). +func (c *SpanCounter) MatchesSpan(span SpanData, root SpanData) bool { + for _, cond := range c.Conditions { + var value any + var exists bool + for _, field := range cond.Fields { + if strings.HasPrefix(field, RootPrefix) { + if root != nil { + f := field[len(RootPrefix):] + if root.Exists(f) { + value = root.Get(f) + exists = true + break + } + } + } else { + if span.Exists(field) { + value = span.Get(field) + exists = true + break + } + } + } + + if cond.Matches != nil { + if !cond.Matches(value, exists) { + return false + } + } else { + if !ConditionMatchesValue(cond, value, exists) { + return false + } + } + } + return true +} + +// ConditionMatchesValue evaluates a condition against a value when the +// condition's Matches function has not been set (i.e. Datatype is unspecified). +// This is exported so that sample/rules.go can share the implementation. +func ConditionMatchesValue(condition *RulesBasedSamplerCondition, value interface{}, exists bool) bool { + var match bool + switch exists { + case true: + switch condition.Operator { + case Exists: + match = exists + case NEQ: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison != equal + } + case EQ: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == equal + } + case GT: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == more + } + case GTE: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == more || comparison == equal + } + case LT: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == less + } + case LTE: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == less || comparison == equal + } + } + case false: + switch condition.Operator { + case NotExists: + match = !exists + } + } + return match +} + +const ( + less = -1 + equal = 0 + more = 1 +) + +// compareValues compares two values of potentially mixed numeric types. +// a is the span field value (float64, int64, bool, or string). +// b is the condition value (float64, int64, int, bool, or string). +func compareValues(a, b interface{}) (int, bool) { + if a == nil { + if b == nil { + return equal, true + } + return less, true + } + + if b == nil { + return more, true + } + + switch at := a.(type) { + case int64: + switch bt := b.(type) { + case int: + i := int(at) + switch { + case i < bt: + return less, true + case i > bt: + return more, true + default: + return equal, true + } + case int64: + switch { + case at < bt: + return less, true + case at > bt: + return more, true + default: + return equal, true + } + case float64: + f := float64(at) + switch { + case f < bt: + return less, true + case f > bt: + return more, true + default: + return equal, true + } + } + case float64: + switch bt := b.(type) { + case int: + f := float64(bt) + switch { + case at < f: + return less, true + case at > f: + return more, true + default: + return equal, true + } + case int64: + f := float64(bt) + switch { + case at < f: + return less, true + case at > f: + return more, true + default: + return equal, true + } + case float64: + switch { + case at < bt: + return less, true + case at > bt: + return more, true + default: + return equal, true + } + } + case bool: + switch bt := b.(type) { + case bool: + switch { + case !at && bt: + return less, true + case at && !bt: + return more, true + default: + return equal, true + } + } + case string: + switch bt := b.(type) { + case string: + return strings.Compare(at, bt), true + } + } + + return equal, false +} diff --git a/config/span_counter_config_test.go b/config/span_counter_config_test.go new file mode 100644 index 0000000000..287e438aa2 --- /dev/null +++ b/config/span_counter_config_test.go @@ -0,0 +1,302 @@ +package config + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// spanData is a simple map-backed implementation of SpanData for tests. +type spanData map[string]any + +func (s spanData) Get(key string) any { return s[key] } +func (s spanData) Exists(key string) bool { _, ok := s[key]; return ok } + +// cond builds an initialized RulesBasedSamplerCondition from a field name, +// operator, and optional value. It calls Init() so that the Matches function +// is set when Datatype is empty (the ConditionMatchesValue path). +func cond(field, operator string, value any) *RulesBasedSamplerCondition { + c := &RulesBasedSamplerCondition{ + Field: field, + Operator: operator, + Value: value, + } + if err := c.Init(); err != nil { + panic("cond Init: " + err.Error()) + } + return c +} + +// condTyped builds an initialized condition with an explicit Datatype, which +// causes Init to set a type-coercing Matches function instead of falling +// through to ConditionMatchesValue. +func condTyped(field, operator string, value any, datatype string) *RulesBasedSamplerCondition { + c := &RulesBasedSamplerCondition{ + Field: field, + Operator: operator, + Value: value, + Datatype: datatype, + } + if err := c.Init(); err != nil { + panic("condTyped Init: " + err.Error()) + } + return c +} + +// ---------------------------------------------------------------------------- +// compareValues +// ---------------------------------------------------------------------------- + +func TestCompareValues(t *testing.T) { + tests := []struct { + name string + a, b any + want int + wantOK bool + }{ + // nil handling + {"nil==nil", nil, nil, equal, true}, + {"nilnil", int64(1), nil, more, true}, + + // int64 vs int64 + {"i64 less", int64(1), int64(2), less, true}, + {"i64 equal", int64(3), int64(3), equal, true}, + {"i64 more", int64(5), int64(4), more, true}, + + // int64 vs int + {"i64 vs int less", int64(1), int(2), less, true}, + {"i64 vs int equal", int64(3), int(3), equal, true}, + {"i64 vs int more", int64(5), int(4), more, true}, + + // int64 vs float64 + {"i64 vs f64 less", int64(1), float64(1.5), less, true}, + {"i64 vs f64 equal", int64(2), float64(2.0), equal, true}, + {"i64 vs f64 more", int64(3), float64(2.9), more, true}, + + // float64 vs float64 + {"f64 less", float64(1.1), float64(1.2), less, true}, + {"f64 equal", float64(2.5), float64(2.5), equal, true}, + {"f64 more", float64(3.0), float64(2.0), more, true}, + + // float64 vs int + {"f64 vs int less", float64(0.5), int(1), less, true}, + {"f64 vs int equal", float64(2.0), int(2), equal, true}, + {"f64 vs int more", float64(2.1), int(2), more, true}, + + // float64 vs int64 + {"f64 vs i64 less", float64(0.5), int64(1), less, true}, + {"f64 vs i64 equal", float64(2.0), int64(2), equal, true}, + {"f64 vs i64 more", float64(3.0), int64(2), more, true}, + + // bool + {"bool falsefalse", true, false, more, true}, + {"bool equal", true, true, equal, true}, + + // string + {"str less", "apple", "banana", less, true}, + {"str equal", "foo", "foo", equal, true}, + {"str more", "zoo", "ant", more, true}, + + // type mismatch โ†’ ok=false + {"mismatch int64 str", int64(1), "1", equal, false}, + {"mismatch f64 str", float64(1.0), "1.0", equal, false}, + {"mismatch bool str", true, "true", equal, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, ok := compareValues(tc.a, tc.b) + assert.Equal(t, tc.wantOK, ok, "ok") + if tc.wantOK { + assert.Equal(t, tc.want, got, "comparison result") + } + }) + } +} + +// ---------------------------------------------------------------------------- +// ConditionMatchesValue +// ---------------------------------------------------------------------------- + +func TestConditionMatchesValue(t *testing.T) { + tests := []struct { + name string + operator string + condVal any + spanVal any + exists bool + want bool + }{ + // Exists / NotExists + {"exists true", Exists, nil, "anything", true, true}, + {"exists false", Exists, nil, nil, false, false}, + {"not-exists true", NotExists, nil, nil, false, true}, + {"not-exists false", NotExists, nil, "x", true, false}, + + // EQ + {"eq string match", EQ, "foo", "foo", true, true}, + {"eq string no-match", EQ, "foo", "bar", true, false}, + {"eq int64 match", EQ, int64(42), int64(42), true, true}, + {"eq int64 no-match", EQ, int64(42), int64(0), true, false}, + {"eq type mismatch", EQ, "1", int64(1), true, false}, // compareValues returns ok=false โ†’ no match + + // NEQ + {"neq match", NEQ, "foo", "bar", true, true}, + {"neq no-match", NEQ, "foo", "foo", true, false}, + + // GT / GTE / LT / LTE + {"gt true", GT, int64(1), int64(2), true, true}, + {"gt false eq", GT, int64(1), int64(1), true, false}, + {"gte equal", GTE, int64(1), int64(1), true, true}, + {"gte more", GTE, int64(1), int64(2), true, true}, + {"gte less", GTE, int64(2), int64(1), true, false}, + {"lt true", LT, int64(2), int64(1), true, true}, + {"lt false", LT, int64(1), int64(2), true, false}, + {"lte equal", LTE, int64(2), int64(2), true, true}, + {"lte less", LTE, int64(3), int64(2), true, true}, + {"lte more", LTE, int64(1), int64(2), true, false}, + + // field does not exist with non-NotExists operator โ†’ no match + {"eq field missing", EQ, "foo", nil, false, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + c := &RulesBasedSamplerCondition{ + Operator: tc.operator, + Value: tc.condVal, + } + got := ConditionMatchesValue(c, tc.spanVal, tc.exists) + assert.Equal(t, tc.want, got) + }) + } +} + +// ---------------------------------------------------------------------------- +// SpanCounter.MatchesSpan +// ---------------------------------------------------------------------------- + +func TestMatchesSpan_NoConditions(t *testing.T) { + // A counter with no conditions matches every span. + counter := SpanCounter{Key: "all"} + assert.True(t, counter.MatchesSpan(spanData{"foo": "bar"}, nil)) + assert.True(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_SingleCondition(t *testing.T) { + counter := SpanCounter{ + Key: "errors", + Conditions: []*RulesBasedSamplerCondition{cond("error", EQ, true)}, + } + + assert.True(t, counter.MatchesSpan(spanData{"error": true}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": false}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultipleConditionsAllMustMatch(t *testing.T) { + counter := SpanCounter{ + Key: "slow-errors", + Conditions: []*RulesBasedSamplerCondition{ + cond("error", EQ, true), + cond("duration_ms", GT, int64(500)), + }, + } + + assert.True(t, counter.MatchesSpan(spanData{"error": true, "duration_ms": int64(1000)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": true, "duration_ms": int64(100)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": false, "duration_ms": int64(1000)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_RootPrefixedField(t *testing.T) { + // "root.service.name" reads from the root span data, not the span itself. + counter := SpanCounter{ + Key: "svc-db", + Conditions: []*RulesBasedSamplerCondition{cond("root.service.name", EQ, "database")}, + } + + root := spanData{"service.name": "database"} + span := spanData{"duration_ms": int64(5)} + + assert.True(t, counter.MatchesSpan(span, root)) + assert.False(t, counter.MatchesSpan(span, spanData{"service.name": "api"})) +} + +func TestMatchesSpan_RootPrefixedField_NilRoot(t *testing.T) { + // When root is nil a root-prefixed field is never found โ†’ field is absent. + counter := SpanCounter{ + Key: "svc", + Conditions: []*RulesBasedSamplerCondition{cond("root.service.name", EQ, "database")}, + } + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultiFieldFallback(t *testing.T) { + // When multiple fields are listed, the first one found is used. + c := &RulesBasedSamplerCondition{ + Fields: []string{"trace.trace_id", "traceId"}, + Operator: Exists, + } + if err := c.Init(); err != nil { + t.Fatal(err) + } + counter := SpanCounter{Key: "has-trace", Conditions: []*RulesBasedSamplerCondition{c}} + + assert.True(t, counter.MatchesSpan(spanData{"trace.trace_id": "abc"}, nil)) + assert.True(t, counter.MatchesSpan(spanData{"traceId": "abc"}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultiFieldFallback_FirstWins(t *testing.T) { + // If the first field exists but evaluates to a non-match, the second field + // is not consulted โ€” only the first found field is used. + c := &RulesBasedSamplerCondition{ + Fields: []string{"a", "b"}, + Operator: EQ, + Value: "yes", + } + if err := c.Init(); err != nil { + t.Fatal(err) + } + counter := SpanCounter{Key: "k", Conditions: []*RulesBasedSamplerCondition{c}} + + // "a" is found with wrong value; "b" has the right value but is not checked. + assert.False(t, counter.MatchesSpan(spanData{"a": "no", "b": "yes"}, nil)) + // Only "b" exists โ†’ fallback to "b" โ†’ match. + assert.True(t, counter.MatchesSpan(spanData{"b": "yes"}, nil)) +} + +func TestMatchesSpan_TypedCondition(t *testing.T) { + // When Datatype is set, Init wires up a type-coercing Matches function. + // Verify that MatchesSpan delegates to it correctly. + counter := SpanCounter{ + Key: "count-int", + Conditions: []*RulesBasedSamplerCondition{condTyped("code", EQ, 200, "int")}, + } + + // span value arrives as string "200"; the typed matcher coerces it. + assert.True(t, counter.MatchesSpan(spanData{"code": "200"}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"code": "404"}, nil)) +} + +func TestMatchesSpan_ExistsAndNotExists(t *testing.T) { + exists := SpanCounter{ + Key: "has-field", + Conditions: []*RulesBasedSamplerCondition{cond("db.query", Exists, nil)}, + } + notExists := SpanCounter{ + Key: "no-field", + Conditions: []*RulesBasedSamplerCondition{cond("db.query", NotExists, nil)}, + } + + withField := spanData{"db.query": "SELECT 1"} + without := spanData{} + + assert.True(t, exists.MatchesSpan(withField, nil)) + assert.False(t, exists.MatchesSpan(without, nil)) + assert.False(t, notExists.MatchesSpan(withField, nil)) + assert.True(t, notExists.MatchesSpan(without, nil)) +} diff --git a/config/validate.go b/config/validate.go index 41b0c135b7..92f89f5c33 100644 --- a/config/validate.go +++ b/config/validate.go @@ -653,6 +653,31 @@ func (m *Metadata) ValidateRules(data map[string]any) ValidationResults { } } hasSamplers = true + case "SpanCounters": + if arr, ok := v.([]any); !ok { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters must be an array, but %v is %T", v, v), + Severity: Error, + }) + } else { + for i, entry := range arr { + if entryMap, ok := entry.(map[string]any); ok { + rulesmap := map[string]any{"SpanCounters": entryMap} + subresults := m.Validate(rulesmap) + for _, result := range subresults { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("Within SpanCounters[%d]: %s", i, result.Message), + Severity: result.Severity, + }) + } + } else { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d] must be an object, but %v is %T", i, entry, entry), + Severity: Error, + }) + } + } + } default: results = append(results, ValidationResult{ Message: fmt.Sprintf("unknown top-level key %s", k), diff --git a/config_complete.yaml b/config_complete.yaml index beb7eaf4ba..21e1a24751 100644 --- a/config_complete.yaml +++ b/config_complete.yaml @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created on 2026-02-25 at 20:49:27 UTC from ../../config.yaml using a template generated on 2026-02-25 at 20:49:24 UTC +# created on 2026-04-09 at 22:21:32 UTC from ../../config.yaml using a template generated on 2026-04-09 at 22:21:28 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -166,16 +166,35 @@ AccessKeys: ## will be proxied through to the upstream API directly without modifying ## keys. ## - ## Not eligible for live reload. + ## Eligible for live reload. # ReceiveKeys: # - your-key-goes-here + ## ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will + ## treat specially. + ## + ## When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + ## Honeycomb ingest key ID matches an entry in this list will be + ## accepted. The key ID is the `id` field returned by the Honeycomb + ## `/1/auth` endpoint; it is distinct from the full API key value. + ## This allows authorization based on key IDs rather than full key + ## values, which avoids storing secret key material in the configuration + ## file. Both `ReceiveKeys` and `ReceiveKeyIDs` may be used + ## simultaneously. + ## Note: This feature does not support legacy API keys. Only Honeycomb + ## Ingest Keys (which have a key ID) are compatible with this setting. + ## + ## Eligible for live reload. + # ReceiveKeyIDs: + # - your-key-id-goes-here + ## AcceptOnlyListedKeys is a boolean flag that causes events arriving ## with API keys not in the `ReceiveKeys` list to be rejected. ## - ## If `true`, then only traffic using the keys listed in `ReceiveKeys` is - ## accepted. Events arriving with API keys not in the `ReceiveKeys` list - ## will be rejected with an HTTP `401` error. + ## If `true`, then only traffic using the keys listed in `ReceiveKeys` or + ## whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + ## with API keys not in either list will be rejected with an HTTP `401` + ## error. ## If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. ## This setting is applied **before** the `SendKey` and `SendKeyMode` ## settings. @@ -695,6 +714,24 @@ OTelMetrics: ## Options: none gzip # Compression: gzip + ## AdditionalAttributes adds the provided attributes as resource + ## attributes on all OpenTelemetry metrics emitted by Refinery. + ## + ## This is useful for injecting deployment-specific metadata (such as a + ## cluster ID or environment name) into metrics so they can be filtered + ## or grouped in the metrics backend. Both keys and values must be + ## strings. + ## When supplying via a environment variable, the value should be a + ## string of comma-separated key-value pairs. When supplying via the + ## command line, the value should be a key value pair. If multiple + ## key-value pairs are needed, each should be supplied via its own + ## command line flag. The key-value pairs must use ':' as the separator. + ## + ## Not eligible for live reload. + # AdditionalAttributes: + # pipeline.id: '12345' + # rollout.id: '67890' + ########################### ## OpenTelemetry Tracing ## ########################### diff --git a/go.mod b/go.mod index 6cbe637eef..eb1c581408 100644 --- a/go.mod +++ b/go.mod @@ -15,68 +15,105 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/honeycombio/dynsampler-go v0.6.4 github.com/honeycombio/hpsf v0.14.0 - github.com/honeycombio/husky v0.41.0 - github.com/honeycombio/libhoney-go v1.26.0 + github.com/honeycombio/husky v0.43.1 + github.com/honeycombio/libhoney-go v1.27.1 github.com/jessevdk/go-flags v1.6.1 github.com/jonboulle/clockwork v0.5.0 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.18.2 - github.com/open-telemetry/opamp-go v0.22.0 - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0 + github.com/klauspost/compress v1.18.6 + github.com/open-telemetry/opamp-go v0.23.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0 github.com/panmari/cuckoofilter v1.0.6 - github.com/pelletier/go-toml/v2 v2.2.4 + github.com/pelletier/go-toml/v2 v2.3.0 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.23.2 github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9 - github.com/redis/go-redis/v9 v9.17.3 + github.com/redis/go-redis/v9 v9.19.0 github.com/sirupsen/logrus v1.9.4 github.com/sourcegraph/conc v0.3.0 github.com/stretchr/testify v1.11.1 + github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 github.com/tidwall/gjson v1.18.0 - github.com/tinylib/msgp v1.6.2 - github.com/valyala/fastjson v1.6.7 + github.com/tinylib/msgp v1.6.4 + github.com/valyala/fastjson v1.6.10 github.com/vmihailenco/msgpack/v5 v5.4.1 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 - go.opentelemetry.io/otel v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0 - go.opentelemetry.io/otel/metric v1.40.0 - go.opentelemetry.io/otel/sdk v1.40.0 - go.opentelemetry.io/otel/sdk/metric v1.40.0 - go.opentelemetry.io/otel/trace v1.40.0 - go.opentelemetry.io/proto/otlp v1.9.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + go.opentelemetry.io/proto/otlp v1.10.0 golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b - google.golang.org/grpc v1.78.0 + google.golang.org/grpc v1.80.0 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 ) require ( + dario.cat/mergo v1.0.2 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/containerd/platforms v0.2.1 // indirect + github.com/cpuguy83/dockercfg v0.3.2 // indirect + github.com/distribution/reference v0.6.0 // indirect + github.com/docker/go-connections v0.6.0 // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/ebitengine/purego v0.10.0 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/go-licenses/v2 v2.0.1 // indirect github.com/google/licenseclassifier/v2 v2.0.0 // indirect github.com/gorilla/websocket v1.5.3 // indirect - github.com/hashicorp/go-version v1.8.0 // indirect + github.com/hashicorp/go-version v1.9.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/magiconair/properties v1.8.10 // indirect + github.com/mdelapenya/tlscert v0.2.0 // indirect github.com/michel-laterman/proxy-connect-dialer-go v0.1.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/go-archive v0.2.0 // indirect + github.com/moby/moby/api v1.54.1 // indirect + github.com/moby/moby/client v0.4.0 // indirect + github.com/moby/patternmatcher v0.6.1 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect + github.com/moby/sys/user v0.4.0 // indirect + github.com/moby/sys/userns v0.1.0 // indirect + github.com/moby/term v0.5.2 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.1.1 // indirect github.com/otiai10/copy v1.10.0 // indirect github.com/philhofer/fwd v1.2.0 // indirect + github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/sergi/go-diff v1.2.0 // indirect + github.com/shirou/gopsutil/v4 v4.26.3 // indirect github.com/spf13/cobra v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/objx v0.5.2 // indirect + github.com/stretchr/objx v0.5.3 // indirect + github.com/testcontainers/testcontainers-go v0.42.0 // indirect + github.com/tklauser/go-sysconf v0.3.16 // indirect + github.com/tklauser/numcpus v0.11.0 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/collector/featuregate v1.51.0 // indirect + go.opentelemetry.io/collector/featuregate v1.57.0 // indirect + go.opentelemetry.io/collector/pdata/xpdata v0.151.0 // indirect go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0 // indirect go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0 // indirect + go.uber.org/atomic v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/sync v0.20.0 // indirect gopkg.in/alexcesaro/statsd.v2 v2.0.0 // indirect k8s.io/klog/v2 v2.90.1 // indirect ) @@ -86,7 +123,6 @@ require ( github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 // indirect - github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 // indirect github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52 // indirect @@ -95,7 +131,7 @@ require ( github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/google/uuid v1.6.0 - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect @@ -107,17 +143,15 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - go.opentelemetry.io/collector/pdata v1.51.0 + go.opentelemetry.io/collector/pdata v1.57.0 go.uber.org/multierr v1.11.0 // indirect - golang.org/x/mod v0.31.0 - golang.org/x/net v0.49.0 // indirect - golang.org/x/sys v0.40.0 // indirect - golang.org/x/text v0.33.0 // indirect - golang.org/x/tools v0.40.0 - google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect + golang.org/x/mod v0.35.0 + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + golang.org/x/tools v0.43.0 + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d // indirect ) tool github.com/google/go-licenses/v2 - -replace go.opentelemetry.io/proto/otlp => github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat diff --git a/go.sum b/go.sum index 3eb28ed3bf..ebf3dece30 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,15 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= +dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= @@ -21,7 +29,19 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= +github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= +github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= +github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/creasty/defaults v1.8.0 h1:z27FJxCAa0JKt3utc0sCImAEb+spPucmKoOdLHvHYKk= github.com/creasty/defaults v1.8.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -30,12 +50,18 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM= github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= -github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= -github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dgryski/go-wyhash v0.0.0-20191203203029-c4841ae36371 h1:bz5ApY1kzFBvw3yckuyRBCtqGvprWrKswYK468nm+Gs= github.com/dgryski/go-wyhash v0.0.0-20191203203029-c4841ae36371/go.mod h1:/ENMIO1SQeJ5YQeUWWpbX8f+bS8INHrrhFjXgEqi4LA= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= +github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= +github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= +github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -66,6 +92,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -91,6 +119,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -112,22 +141,20 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og= github.com/grafana/pyroscope-go/godeltaprof v0.1.9/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= -github.com/hashicorp/go-version v1.8.0 h1:KAkNb1HAiZd1ukkxDFGmokVZe1Xy9HG6NUp+bPle2i4= -github.com/hashicorp/go-version v1.8.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/hashicorp/go-version v1.9.0 h1:CeOIz6k+LoN3qX9Z0tyQrPtiB1DFYRPfCIBtaXPSCnA= +github.com/hashicorp/go-version v1.9.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/honeycombio/dynsampler-go v0.6.4 h1:EM3FXN2Lfmso41MRMmSvRynMrz+AHiRffaWHPf4ZHDs= github.com/honeycombio/dynsampler-go v0.6.4/go.mod h1:M5YYNOfxRrBlEWDatTlHMYo5F7GjwVnptx5z+uXIVMo= github.com/honeycombio/hpsf v0.14.0 h1:LeQbDuT+aVmiJnWp9Kqb9Qqz5OZcjDk85RMzzwKtCKI= github.com/honeycombio/hpsf v0.14.0/go.mod h1:VyPjyn1GViOiCrpBbPZCkEJnuDuSTUpU8LV5CWVTQm4= -github.com/honeycombio/husky v0.41.0 h1:6iuC3FJpU2xZUveLFGAWvDP/Xp9Vnt1vMgwu2UCeQfA= -github.com/honeycombio/husky v0.41.0/go.mod h1:kgwFQfPCC82f5BxuBb8BAuuC1Q7e5NK7EVsjcjztuXo= -github.com/honeycombio/libhoney-go v1.26.0 h1:fdwS7c/5h6ifJqQZ178nm4UEZha04GTbwJMZ7xkShhk= -github.com/honeycombio/libhoney-go v1.26.0/go.mod h1:cR+t7pq9heP00+1/+TNWCrAfjSA74xKWI8YGOANlzYY= -github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat h1:g6pUF6IZVLG93vZbUefK0qF20CGx0zf0q3n3Fw4gv1s= -github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat/go.mod h1:ZyEcAltAA7tCBVo5o+5klmG2l+43E1fjpxGxvOIskic= +github.com/honeycombio/husky v0.43.1 h1:HRaSO59KujOsYNQO1Qkn8YFboizheTJcKlBvVhClDe8= +github.com/honeycombio/husky v0.43.1/go.mod h1:lQ1VzGZxeYPCr4zxmak1lVe29HJFqJ6bQXWCl0ZqlNg= +github.com/honeycombio/libhoney-go v1.27.1 h1:79FR19fVpaeDMqTDfpXtMxd90vzsxhZnIOSysMrUSQQ= +github.com/honeycombio/libhoney-go v1.27.1/go.mod h1:qLZO8Q3ep/hISEoVC7m8N9ZOvn2eqaGdoJg9XXXasqM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4= @@ -136,8 +163,10 @@ github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbd github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= -github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/compress v1.18.6 h1:2jupLlAwFm95+YDR+NwD2MEfFO9d4z4Prjl1XXDjuao= +github.com/klauspost/compress v1.18.6/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -147,8 +176,32 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= +github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI= +github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o= github.com/michel-laterman/proxy-connect-dialer-go v0.1.0 h1:Q8asukpmyrEheocd+R+6YEI4jcm62sHHalgTMG+LoLw= github.com/michel-laterman/proxy-connect-dialer-go v0.1.0/go.mod h1:HTlVkRAqzTRPYbWxgAiwMT9HRZMOqP3Mx7+toa3yJjc= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8= +github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU= +github.com/moby/moby/api v1.54.1 h1:TqVzuJkOLsgLDDwNLmYqACUuTehOHRGKiPhvH8V3Nn4= +github.com/moby/moby/api v1.54.1/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs= +github.com/moby/moby/client v0.4.0 h1:S+2XegzHQrrvTCvF6s5HFzcrywWQmuVnhOXe2kiWjIw= +github.com/moby/moby/client v0.4.0/go.mod h1:QWPbvWchQbxBNdaLSpoKpCdf5E+WxFAgNHogCWDoa7g= +github.com/moby/patternmatcher v0.6.1 h1:qlhtafmr6kgMIJjKJMDmMWq7WLkKIo23hsrpR3x084U= +github.com/moby/patternmatcher v0.6.1/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= +github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= +github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= +github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= +github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= +github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= +github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -157,30 +210,36 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/open-telemetry/opamp-go v0.22.0 h1:7UnsQgFFS7ffM09JQk+9aGVBAAlsLfcooZ9xvSYwxWM= -github.com/open-telemetry/opamp-go v0.22.0/go.mod h1:339N71soCPrhHywbAcKUZJDODod581ZOxCpTkrl3zYQ= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.145.0 h1:lbxy2bYh3v0YIyqd/JVttEwYlC7yU5o3JU2N/m5Qnq8= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.145.0/go.mod h1:kGlLjX8CJSE+9SfLARgaXTFBuAvNadjLvPsHO7fcVeE= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0 h1:0ithmsGyVtjzODmAPp9pkxA4IlnYpyeXmDWrryTkHNo= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0/go.mod h1:r+K/aCWpUCDDM5Gisznf9ZQjpZcyFr84CuATA9486JQ= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0 h1:sB4yuYx45zig1ceQ+kmrEYy0xMZ+mGagwYIFtJkkU1w= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0/go.mod h1:uLhceuH7ZtiVxk+B0MHI0vhJG2Y4aOzT/hrV6c5KjVU= +github.com/open-telemetry/opamp-go v0.23.0 h1:k7h7w/muprut9/DAhUC4anX4v7hIdgO02gIsSjV4uq0= +github.com/open-telemetry/opamp-go v0.23.0/go.mod h1:DIIVdkLefdqPW5L+4I2twmAicVrTB0Bp5XJAfedZzAM= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.151.0 h1:M+d61Wo6zhJoAWKDVUJeeZa46hepah1s+zKgfPlD0ng= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.151.0/go.mod h1:UjELBH4CzaY+y3fHR4RpenHJ3277jBYxTC4xEa5Sxfk= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0 h1:JbnrAMGHqSW+jvJRL9RS7JGMrWpXqGPXdkAk6JoMHV4= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0/go.mod h1:xoSnCUue2dtnuMyJd/1xz7JaQ2G7eweNxM0Laj1uuVc= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0 h1:c8+upXGwDxokINkuChSD7INYHlpcCAyQs2aXpx4rzSs= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0/go.mod h1:Ln3K9yJgPAwEUXqCoR8htVs6bk3cyj6zIPOyM/LhiPo= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0 h1:lFowWhr/qx5Gm2X8H0BbG87xZh/e+4S0PQw8HQO5D4Y= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0/go.mod h1:JybcaNLHHzJQh690eSp+KDbLrxB1+AhKNLlibqrogt4= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= +github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/otiai10/copy v1.10.0 h1:znyI7l134wNg/wDktoVQPxPkgvhDfGCYUasey+h0rDQ= github.com/otiai10/copy v1.10.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/panmari/cuckoofilter v1.0.6 h1:WKb1aSj16h22x0CKVtTCaRkJiCnVGPLEMGbNY8xwXf8= github.com/panmari/cuckoofilter v1.0.6/go.mod h1:bKADbQPGbN6TxUvo/IbMEIUbKuASnpsOvrLTgpSX0aU= -github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= -github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= +github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -194,14 +253,16 @@ github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9 h1:bsUq1dX0N8A github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rdleal/go-priorityq v0.0.0-20240324224830-28716009213d h1:OuC714/HtVeMJo6Y1mRkeuDmu3t+F0cgh6qPDGqLmqI= github.com/rdleal/go-priorityq v0.0.0-20240324224830-28716009213d/go.mod h1:X4AAZOixX/7z5rgQkIkMa72A0++MLRke9nipxYUg+8E= -github.com/redis/go-redis/v9 v9.17.3 h1:fN29NdNrE17KttK5Ndf20buqfDZwGNgoUr9qjl1DQx4= -github.com/redis/go-redis/v9 v9.17.3/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370= +github.com/redis/go-redis/v9 v9.19.0 h1:XPVaaPSnG6RhYf7p+rmSa9zZfeVAnWsH5h3lxthOm/k= +github.com/redis/go-redis/v9 v9.19.0/go.mod h1:v/M13XI1PVCDcm01VtPFOADfZtHf8YW3baQf57KlIkA= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc= +github.com/shirou/gopsutil/v4 v4.26.3/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= @@ -213,8 +274,8 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= -github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= +github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -222,6 +283,10 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44XtK+qhjat1nI9cneBbUY= +github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 h1:id/6LH8ZeDrtAUVSuNvZUAJ1kVpb82y1pr9yweAWsRg= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0/go.mod h1:uF0jI8FITagQpBNOgweGBmPf6rP4K0SeL1XFPbsZSSY= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -229,56 +294,70 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= -github.com/tinylib/msgp v1.6.2 h1:D40LN895O9HJpN8n5Ksqk+abl7zw6RtizDwgRCE7hXk= -github.com/tinylib/msgp v1.6.2/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= -github.com/valyala/fastjson v1.6.7 h1:ZE4tRy0CIkh+qDc5McjatheGX2czdn8slQjomexVpBM= -github.com/valyala/fastjson v1.6.7/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= +github.com/tinylib/msgp v1.6.4 h1:mOwYbyYDLPj35mkA2BjjYejgJk9BuHxDdvRnb6v2ZcQ= +github.com/tinylib/msgp v1.6.4/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= +github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= +github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= +github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= +github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ= +github.com/valyala/fastjson v1.6.10 h1:/yjJg8jaVQdYR3arGxPE2X5z89xrlhS0eGXdv+ADTh4= +github.com/valyala/fastjson v1.6.10/go.mod h1:e6FubmQouUNP73jtMLmcbxS6ydWIpOfhz34TSfO3JaE= github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= +github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/collector/featuregate v1.51.0 h1:dxJuv/3T84dhNKp7fz5+8srHz1dhquGzDpLW4OZTFBw= -go.opentelemetry.io/collector/featuregate v1.51.0/go.mod h1:/1bclXgP91pISaEeNulRxzzmzMTm4I5Xih2SnI4HRSo= -go.opentelemetry.io/collector/internal/testutil v0.145.0 h1:H/KL0GH3kGqSMKxZvnQ0B0CulfO9xdTg4DZf28uV7fY= -go.opentelemetry.io/collector/internal/testutil v0.145.0/go.mod h1:YAD9EAkwh/l5asZNbEBEUCqEjoL1OKMjAMoPjPqH76c= -go.opentelemetry.io/collector/pdata v1.51.0 h1:DnDhSEuDXNdzGRB7f6oOfXpbDApwBX3tY+3K69oUrDA= -go.opentelemetry.io/collector/pdata v1.51.0/go.mod h1:GoX1bjKDR++mgFKdT7Hynv9+mdgQ1DDXbjs7/Ww209Q= -go.opentelemetry.io/collector/pdata/pprofile v0.145.0 h1:ASMKpoqokf8HhzjoeMKZf0K6UXLhufVwNXH0sSuUn5w= -go.opentelemetry.io/collector/pdata/pprofile v0.145.0/go.mod h1:a60GC7wQPhLAixWzKbbP51QLwwc+J0Cmp4SurOlhGUk= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= -go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= -go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0 h1:9y5sHvAxWzft1WQ4BwqcvA+IFVUJ1Ya75mSAUnFEVwE= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0/go.mod h1:eQqT90eR3X5Dbs1g9YSM30RavwLF725Ris5/XSXWvqE= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 h1:QKdN8ly8zEMrByybbQgv8cWBcdAarwmIPZ6FThrWXJs= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0/go.mod h1:bTdK1nhqF76qiPoCCdyFIV+N/sRHYXYCTQc+3VCi3MI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0 h1:wVZXIWjQSeSmMoxF74LzAnpVQOAFDo3pPji9Y4SOFKc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0/go.mod h1:khvBS2IggMFNwZK/6lEeHg/W57h/IX6J4URh57fuI40= -go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= -go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= -go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= -go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= -go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= -go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= -go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= -go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.opentelemetry.io/collector/featuregate v1.57.0 h1:KPDSUKYn6MHwgyGRSGPPcW/G96HH93pxuvvPwM+R8nY= +go.opentelemetry.io/collector/featuregate v1.57.0/go.mod h1:4ga1QBMPEejXXmpyJS8lmaRpknJ3Lb9Bvk6e420bUFU= +go.opentelemetry.io/collector/internal/testutil v0.151.0 h1:CFjDItLuqzblItOsnK6IPSdrsOaZCaDjYpB8qWG+XHI= +go.opentelemetry.io/collector/internal/testutil v0.151.0/go.mod h1:Jkjs6rkqs973LqgZ0Fe3zrokQRKULYXPIf4HuqStiEE= +go.opentelemetry.io/collector/pdata v1.57.0 h1:oDWBMjEIqyJO3GJEB+iwqxj47rxDK19OKzwaFEaE4sg= +go.opentelemetry.io/collector/pdata v1.57.0/go.mod h1:wZojinP6mNhLXudH8QXx/bjWzOsKMxi/FXwnk+12G/w= +go.opentelemetry.io/collector/pdata/pprofile v0.151.0 h1:hsU0+DpkvhJh3xL1Y8CX2vAPdLMoJLiw+C+rAMsaxZc= +go.opentelemetry.io/collector/pdata/pprofile v0.151.0/go.mod h1:5zfGTQqRuaKyh2SRaZi4SV4nSD8TzY1kYoOjniOD3uk= +go.opentelemetry.io/collector/pdata/xpdata v0.151.0 h1:trsLPS6jCkwVwJyKxbPqQerAiMpKkQrQLEGIEcyC6yM= +go.opentelemetry.io/collector/pdata/xpdata v0.151.0/go.mod h1:0vID3D52DGVoypLa8S7izv41ElTBEgtAbc0HmB4KF60= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 h1:0Qx7VGBacMm9ZENQ7TnNObTYI4ShC+lHI16seduaxZo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0/go.mod h1:Sje3i3MjSPKTSPvVWCaL8ugBzJwik3u4smCjUeuupqg= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 h1:w1K+pCJoPpQifuVpsKamUdn9U0zM3xUziVOqsGksUrY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0/go.mod h1:HBy4BjzgVE8139ieRI75oXm3EcDN+6GhD88JT1Kjvxg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0 h1:40vBjolEOioNBl8zPj1wxqlA7kJ82RxR4HnUv7W8zRI= go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0/go.mod h1:4wAsc1dEVb4D1ZykBNC9AriTU9uLYtmziLrB+7G4lb4= go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0 h1:yXinc284C6bmzA1r9jk7MxAhrBIIOH3qwmqwBmylZrA= go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0/go.mod h1:ygxocDWPB6Y6bySAjxmHyTebjAJ8jcEUAZc03gu1pxk= -go.opentelemetry.io/proto/slim/otlp v1.9.0 h1:fPVMv8tP3TrsqlkH1HWYUpbCY9cAIemx184VGkS6vlE= -go.opentelemetry.io/proto/slim/otlp v1.9.0/go.mod h1:xXdeJJ90Gqyll+orzUkY4bOd2HECo5JofeoLpymVqdI= -go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0 h1:o13nadWDNkH/quoDomDUClnQBpdQQ2Qqv0lQBjIXjE8= -go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0/go.mod h1:Gyb6Xe7FTi/6xBHwMmngGoHqL0w29Y4eW8TGFzpefGA= -go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0 h1:EiUYvtwu6PMrMHVjcPfnsG3v+ajPkbUeH+IL93+QYyk= -go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0/go.mod h1:mUUHKFiN2SST3AhJ8XhJxEoeVW12oqfXog0Bo8W3Ec4= +go.opentelemetry.io/proto/slim/otlp v1.10.0 h1:iR97Vs/ZDR+y9TfuP9b1XBtdPWeC+OMslIBmhcLU7jM= +go.opentelemetry.io/proto/slim/otlp v1.10.0/go.mod h1:lV9250stpjYLPNA5viFabIgP2QlUGRT1GdTgAf8SIUk= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:RUF5rO0hAlgiJt1fzQVzcVs3vZVNHIcMLgOgG4rWNcQ= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -287,64 +366,71 @@ go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= -golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= +golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= -golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= -golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= -gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= -google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d h1:wT2n40TBqFY6wiwazVK9/iTWbsQrgk5ZfCSVFLO9LQA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -367,7 +453,11 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= +gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= +pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= diff --git a/internal/peer/peers_test.go b/internal/peer/peers_test.go index 344c794eb8..3fca0e9f3c 100644 --- a/internal/peer/peers_test.go +++ b/internal/peer/peers_test.go @@ -12,6 +12,7 @@ import ( "github.com/facebookgo/inject" "github.com/facebookgo/startstop" "github.com/honeycombio/refinery/config" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -87,10 +88,14 @@ func newPeers(c config.Config) (Peers, error) { } func TestPeerShutdown(t *testing.T) { + host, port := redistest.Endpoint(t) c := &config.MockConfig{ GetPeerListenAddrVal: "0.0.0.0:8081", PeerManagementType: "redis", PeerTimeout: 5 * time.Second, + GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ + Host: host + ":" + port, + }, } p, err := newPeers(c) diff --git a/internal/redistest/redistest.go b/internal/redistest/redistest.go new file mode 100644 index 0000000000..c67463935e --- /dev/null +++ b/internal/redistest/redistest.go @@ -0,0 +1,64 @@ +// Package redistest provides a shared Redis testcontainer for tests that need +// a real Redis instance. One container is started per test binary on first +// call and reused across tests; the testcontainers Reaper cleans it up when +// the process exits. +package redistest + +import ( + "context" + "net" + "net/url" + "sync" + "testing" + "time" + + "github.com/testcontainers/testcontainers-go/modules/redis" +) + +const image = "redis:6.2" + +var ( + once sync.Once + sharedHost string + sharedPort string + startup error +) + +// Endpoint returns the host and port of a shared Redis container, starting it +// on first call. The container lives for the duration of the test process. +func Endpoint(t testing.TB) (host, port string) { + t.Helper() + once.Do(start) + if startup != nil { + t.Fatalf("redistest: failed to start Redis container: %v", startup) + } + return sharedHost, sharedPort +} + +func start() { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + c, err := redis.Run(ctx, image) + if err != nil { + startup = err + return + } + conn, err := c.ConnectionString(ctx) + if err != nil { + startup = err + return + } + u, err := url.Parse(conn) + if err != nil { + startup = err + return + } + h, p, err := net.SplitHostPort(u.Host) + if err != nil { + startup = err + return + } + sharedHost = h + sharedPort = p +} diff --git a/metrics.md b/metrics.md index 4be5fcf988..2b39f3f9f8 100644 --- a/metrics.md +++ b/metrics.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Metrics Documentation This document contains the description of various metrics used in Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-04-09 at 22:21:31 UTC. Note: This document does not include metrics defined in the dynsampler-go dependency, as those metrics are generated dynamically at runtime. As a result, certain metrics may be missing or incomplete in this document, but they will still be available during execution with their full names. @@ -34,11 +34,14 @@ This table includes metrics with fully defined names. | trace_span_count | Histogram | Dimensionless | number of spans in a trace | | collector_incoming_queue | Histogram | Dimensionless | number of spans currently in the incoming queue | | collector_peer_queue_length | Gauge | Dimensionless | number of spans in the peer queue | +| collector_peer_queue_capacity | Gauge | Dimensionless | configured maximum number of spans in the peer queue | | collector_incoming_queue_length | Gauge | Dimensionless | number of spans in the incoming queue | +| collector_incoming_queue_capacity | Gauge | Dimensionless | configured maximum number of spans in the incoming queue | | collector_peer_queue | Histogram | Dimensionless | number of spans currently in the peer queue | | collector_cache_size | Gauge | Dimensionless | number of traces currently stored in the trace cache | | collect_cache_entries | Histogram | Dimensionless | Total number of traces currently stored in the cache from all workers | | memory_heap_allocation | Gauge | Bytes | current heap allocation | +| memory_limit | Gauge | Bytes | configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage) | | span_received | Counter | Dimensionless | number of spans received by the collector | | span_processed | Counter | Dimensionless | number of spans processed by the collector | | spans_waiting | UpDown | Dimensionless | number of spans waiting to be processed by the collector | @@ -56,6 +59,7 @@ This table includes metrics with fully defined names. | trace_send_late_span | Counter | Dimensionless | number of spans that are sent due to late span arrival | | dropped_from_stress | Counter | Dimensionless | number of spans dropped due to stress relief | | kept_from_stress | Counter | Dimensionless | number of spans kept due to stress relief | +| events_dropped | Counter | Dimensionless | number of events dropped | | trace_kept_sample_rate | Histogram | Dimensionless | sample rate of kept traces | | trace_aggregate_sample_rate | Histogram | Dimensionless | aggregate sample rate of both kept and dropped traces | | collector_collect_loop_duration_ms | Histogram | Milliseconds | duration of the collect loop, the primary event processing goroutine | diff --git a/metrics/otel_metrics.go b/metrics/otel_metrics.go index 6d969e1046..34d2f56ad4 100644 --- a/metrics/otel_metrics.go +++ b/metrics/otel_metrics.go @@ -118,13 +118,19 @@ func (o *OTelMetrics) Start() error { hostname = hn } - res, err := resource.New(ctx, + // Build resource attributes: start with defaults, then add user-defined additional attributes + resourceOpts := []resource.Option{ resource.WithAttributes(resource.Default().Attributes()...), resource.WithAttributes(attribute.KeyValue{Key: "service.name", Value: attribute.StringValue("refinery")}), resource.WithAttributes(attribute.KeyValue{Key: "service.version", Value: attribute.StringValue(o.Version)}), resource.WithAttributes(attribute.KeyValue{Key: "host.name", Value: attribute.StringValue(hostname)}), resource.WithAttributes(attribute.KeyValue{Key: "hostname", Value: attribute.StringValue(hostname)}), - ) + } + for k, v := range cfg.AdditionalAttributes { + resourceOpts = append(resourceOpts, resource.WithAttributes(attribute.KeyValue{Key: attribute.Key(k), Value: attribute.StringValue(v)})) + } + + res, err := resource.New(ctx, resourceOpts...) if err != nil { return err diff --git a/metrics/otel_metrics_test.go b/metrics/otel_metrics_test.go index 96043bf86a..cc0f7ffd44 100644 --- a/metrics/otel_metrics_test.go +++ b/metrics/otel_metrics_test.go @@ -124,6 +124,46 @@ func Test_OTelMetrics_Raciness(t *testing.T) { metricdatatest.AssertEqual(t, want, got, metricdatatest.IgnoreTimestamp()) } +func Test_OTelMetrics_AdditionalAttributes(t *testing.T) { + rdr := sdkmetric.NewManualReader() + + o := &OTelMetrics{ + Logger: &logger.MockLogger{}, + Config: &config.MockConfig{ + GetOTelMetricsConfigVal: config.OTelMetricsConfig{ + AdditionalAttributes: map[string]string{ + "cluster.id": "test-cluster-123", + "environment": "staging", + }, + }, + }, + testReader: rdr, + } + + err := o.Start() + defer o.Stop() + require.NoError(t, err) + + // Emit a metric so we can collect resource data + o.Register(Metadata{Name: "test_attr", Type: Counter}) + o.Increment("test_attr") + + rm := metricdata.ResourceMetrics{} + err = rdr.Collect(t.Context(), &rm) + require.NoError(t, err) + + // Check that the additional attributes are present as resource attributes + attrs := rm.Resource.Attributes() + attrMap := make(map[string]string) + for _, attr := range attrs { + attrMap[string(attr.Key)] = attr.Value.AsString() + } + + assert.Equal(t, "test-cluster-123", attrMap["cluster.id"], "cluster.id resource attribute should be set") + assert.Equal(t, "staging", attrMap["environment"], "environment resource attribute should be set") + assert.Equal(t, "refinery", attrMap["service.name"], "service.name should still be present") +} + func Benchmark_OTelMetrics_ConcurrentAccess(b *testing.B) { o := &OTelMetrics{ Logger: &logger.NullLogger{}, diff --git a/pubsub/pubsub_test.go b/pubsub/pubsub_test.go index 333ce78b68..32dbaa6a95 100644 --- a/pubsub/pubsub_test.go +++ b/pubsub/pubsub_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/honeycombio/refinery/config" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -22,17 +23,20 @@ var types = []string{ "local", } -func newPubSub(typ string) pubsub.PubSub { +func newPubSub(t testing.TB, typ string) pubsub.PubSub { + t.Helper() var ps pubsub.PubSub m := &metrics.NullMetrics{} m.Start() tracer := noop.NewTracerProvider().Tracer("test") switch typ { case "goredis": + host, port := redistest.Endpoint(t) ps = &pubsub.GoRedisPubSub{ Config: &config.MockConfig{ GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ ClusterName: "test", + Host: host + ":" + port, }, }, Metrics: m, @@ -71,7 +75,7 @@ func TestPubSubBasics(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) l1 := &pubsubListener{} @@ -105,7 +109,7 @@ func TestPubSubMultiSubscriber(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) l1 := &pubsubListener{} l2 := &pubsubListener{} topic := ps.FormatTopic("topic") @@ -138,7 +142,7 @@ func TestPubSubMultiTopic(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) time.Sleep(500 * time.Millisecond) topics := make([]string, topicCount) listeners := make([]*pubsubListener, topicCount) @@ -190,7 +194,7 @@ func TestPubSubLatency(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) var count, total, tmin, tmax int64 mut := sync.Mutex{} @@ -252,7 +256,7 @@ func BenchmarkPubSub(b *testing.B) { ctx := context.Background() for _, typ := range types { b.Run(typ, func(b *testing.B) { - ps := newPubSub(typ) + ps := newPubSub(b, typ) time.Sleep(100 * time.Millisecond) li := &pubsubListener{} diff --git a/refinery_config.md b/refinery_config.md index fc53f1da5e..47d67bd338 100644 --- a/refinery_config.md +++ b/refinery_config.md @@ -158,16 +158,31 @@ Not intended or supported for customer use. This list only applies to span traffic - other Honeycomb API actions will be proxied through to the upstream API directly without modifying keys. -- Not eligible for live reload. +- Eligible for live reload. - Type: `stringarray` - Example: `your-key-goes-here` +### `ReceiveKeyIDs` + +`ReceiveKeyIDs` is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + +When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose Honeycomb ingest key ID matches an entry in this list will be accepted. +The key ID is the `id` field returned by the Honeycomb `/1/auth` endpoint; it is distinct from the full API key value. +This allows authorization based on key IDs rather than full key values, which avoids storing secret key material in the configuration file. +Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. +Note: This feature does not support legacy API keys. +Only Honeycomb Ingest Keys (which have a key ID) are compatible with this setting. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `your-key-id-goes-here` + ### `AcceptOnlyListedKeys` `AcceptOnlyListedKeys` is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. -If `true`, then only traffic using the keys listed in `ReceiveKeys` is accepted. -Events arriving with API keys not in the `ReceiveKeys` list will be rejected with an HTTP `401` error. +If `true`, then only traffic using the keys listed in `ReceiveKeys` or whose key ID is listed in `ReceiveKeyIDs` is accepted. +Events arriving with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. This setting is applied **before** the `SendKey` and `SendKeyMode` settings. @@ -644,6 +659,22 @@ In rare circumstances, compression costs may outweigh the benefits, in which cas - Default: `gzip` - Options: `none`, `gzip` +### `AdditionalAttributes` + +`AdditionalAttributes` adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + +This is useful for injecting deployment-specific metadata (such as a cluster ID or environment name) into metrics so they can be filtered or grouped in the metrics backend. +Both keys and values must be strings. +When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. +When supplying via the command line, the value should be a key value pair. +If multiple key-value pairs are needed, each should be supplied via its own command line flag. +The key-value pairs must use ':' as the separator. + +- Not eligible for live reload. +- Type: `map` +- Example: `pipeline.id:'12345',rollout.id:'67890'` +- Environment variable: `REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES` + ## OpenTelemetry Tracing `OTelTracing` contains configuration for Refinery's own tracing. diff --git a/refinery_rules.md b/refinery_rules.md index 5fcd2b9592..6a22a5d98c 100644 --- a/refinery_rules.md +++ b/refinery_rules.md @@ -671,3 +671,27 @@ If your traces are consistent lengths and changes in trace length is a useful in - Type: `bool` +## Custom Span Count Configuration + +Defines a single custom span counter. +Each counter has a Key that names the field written to the root span, and an optional list of Conditions that must all match for a span to be counted. +Spans are counted when all of the entry's Conditions match. +If Conditions is empty, every span in the trace is counted. +The counter value is written to the root span under the key specified by `Key`. +If no root span exists when the trace is sent, the counter is written to the first non-annotation span instead. + +### `Key` + +The name of the field that will be added to the root span. +Must not be empty. + +- Type: `string` + +### `Conditions` + +All conditions must match for a span to be counted. +If empty, every span in the trace is counted. +Uses the same condition format as rules-based sampler conditions. + +- Type: `objectarray` + diff --git a/route/middleware.go b/route/middleware.go index 3b1d0081ed..6a86f47156 100644 --- a/route/middleware.go +++ b/route/middleware.go @@ -45,12 +45,16 @@ func (r *Router) apiKeyProcessor(next http.Handler) http.Handler { } keycfg := r.Config.GetAccessKeyConfig() - if err := keycfg.IsAccepted(apiKey); err != nil { + keyID := "" + if keycfg.HasKeyIDs() { + keyID = r.getKeyID(apiKey) + } + if err := keycfg.IsAccepted(apiKey, keyID); err != nil { r.handlerReturnWithError(w, ErrAuthInvalid, err) return } - replacement, err := keycfg.GetReplaceKey(apiKey) + replacement, err := keycfg.GetReplaceKey(apiKey, keyID) if err != nil { r.handlerReturnWithError(w, ErrAuthInvalid, err) return diff --git a/route/otlp_logs.go b/route/otlp_logs.go index be23128b3e..6f2e39669f 100644 --- a/route/otlp_logs.go +++ b/route/otlp_logs.go @@ -18,11 +18,15 @@ func (r *Router) postOTLPLogs(w http.ResponseWriter, req *http.Request) { ri := huskyotlp.GetRequestInfoFromHttpHeaders(req.Header) apicfg := r.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = r.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { r.handleOTLPFailureResponse(w, req, huskyotlp.OTLPError{Message: err.Error(), HTTPStatusCode: http.StatusUnauthorized}) return } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateLogsHeaders(); err != nil { switch err { @@ -79,10 +83,14 @@ func (l *LogsServer) Export(ctx context.Context, req *collectorlogs.ExportLogsSe l.router.Metrics.Increment(l.router.metricsNames.routerOtlpLogGrpc) ri := huskyotlp.GetRequestInfoFromGrpcMetadata(ctx) apicfg := l.router.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = l.router.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateLogsHeaders(); err != nil && err != huskyotlp.ErrMissingAPIKeyHeader { return nil, huskyotlp.AsGRPCError(err) diff --git a/route/otlp_logs_test.go b/route/otlp_logs_test.go index 5a8d868033..7c97fbf16f 100644 --- a/route/otlp_logs_test.go +++ b/route/otlp_logs_test.go @@ -603,8 +603,8 @@ func TestLogsOTLPHandler(t *testing.T) { }, } { t.Run(fmt.Sprintf("ApiKey %s SendKeyMode %s SendKey %s", tt.apiKey, tt.mode, tt.sendKey), func(t *testing.T) { - router.environmentCache.addItem(tt.apiKey, "local", time.Minute) - router.environmentCache.addItem(tt.sendKey, "local", time.Minute) + router.environmentCache.addItem(tt.apiKey, authData{environment: "local"}, time.Minute) + router.environmentCache.addItem(tt.sendKey, authData{environment: "local"}, time.Minute) // HTTP request, _ := http.NewRequest("POST", "/v1/logs", bytes.NewReader(body)) diff --git a/route/otlp_trace.go b/route/otlp_trace.go index 2e8fddf177..ff1d4df5dd 100644 --- a/route/otlp_trace.go +++ b/route/otlp_trace.go @@ -26,11 +26,15 @@ func (r *Router) postOTLPTrace(w http.ResponseWriter, req *http.Request) { ri := huskyotlp.GetRequestInfoFromHttpHeaders(req.Header) apicfg := r.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = r.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { r.handleOTLPFailureResponse(w, req, huskyotlp.OTLPError{Message: err.Error(), HTTPStatusCode: http.StatusUnauthorized}) return } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateTracesHeaders(); err != nil { switch err { @@ -137,7 +141,11 @@ func (t *TraceServer) ExportTraceData( // Perform final authentication check (key processing already done in handler) apicfg := t.router.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = t.router.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } @@ -205,7 +213,11 @@ func customTraceExportHandler( // Handle SendKeyMode logic before validation, similar to HTTP handler apicfg := traceServer.router.Config.GetAccessKeyConfig() - keyToUse, err := apicfg.GetReplaceKey(ri.ApiKey) + keyID := "" + if apicfg.HasKeyIDs() { + keyID = traceServer.router.getKeyID(ri.ApiKey) + } + keyToUse, err := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } diff --git a/route/otlp_trace_test.go b/route/otlp_trace_test.go index a57b36ac64..dd5653a986 100644 --- a/route/otlp_trace_test.go +++ b/route/otlp_trace_test.go @@ -505,7 +505,7 @@ func TestOTLPHandler(t *testing.T) { apiKey := "my-api-key" // add cached environment lookup - router.environmentCache.addItem(apiKey, "local", time.Minute) + router.environmentCache.addItem(apiKey, authData{environment: "local"}, time.Minute) req := &collectortrace.ExportTraceServiceRequest{ ResourceSpans: []*trace.ResourceSpans{{ @@ -633,7 +633,7 @@ func TestOTLPHandler(t *testing.T) { event := events[0] // Note: GRPC clients override the user-agent header with their own value. // This is expected behavior and differs from HTTP where custom user-agents are preserved. - assert.Equal(t, "grpc-go/1.78.0", event.Data.MetaRefineryIncomingUserAgent) + assert.Equal(t, "grpc-go/1.80.0", event.Data.MetaRefineryIncomingUserAgent) }) t.Run("spans record incoming user agent - HTTP", func(t *testing.T) { @@ -920,8 +920,8 @@ func TestOTLPHandler(t *testing.T) { }, } { t.Run(fmt.Sprintf("ApiKey %s SendKeyMode %s SendKey %s", tt.apiKey, tt.mode, tt.sendKey), func(t *testing.T) { - router.environmentCache.addItem(tt.apiKey, "local", time.Minute) - router.environmentCache.addItem(tt.sendKey, "local", time.Minute) + router.environmentCache.addItem(tt.apiKey, authData{environment: "local"}, time.Minute) + router.environmentCache.addItem(tt.sendKey, authData{environment: "local"}, time.Minute) // HTTP request, _ := http.NewRequest("POST", "/v1/traces", bytes.NewReader(body)) diff --git a/route/route.go b/route/route.go index 2f0c436a26..cf1a0d6a7e 100644 --- a/route/route.go +++ b/route/route.go @@ -994,18 +994,32 @@ func getFirstValueFromMetadata(key string, md metadata.MD) string { return "" } +// authData holds the information retrieved from the Honeycomb /1/auth endpoint +// and stored in the environment cache. +type authData struct { + environment string + keyID string +} + type environmentCache struct { mutex sync.RWMutex items map[string]*cacheItem ttl time.Duration - getFn func(string) (string, error) + getFn func(string) (authData, error) } +// SetEnvironmentCache replaces the environment cache with a new one using the +// provided TTL and lookup function. The lookup function returns only the +// environment name, and the key ID will be empty in the cached authData. +// This method exists for backward compatibility with tests. func (r *Router) SetEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) { - r.environmentCache = newEnvironmentCache(ttl, getFn) + r.environmentCache = newEnvironmentCache(ttl, func(key string) (authData, error) { + env, err := getFn(key) + return authData{environment: env}, err + }) } -func newEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) *environmentCache { +func newEnvironmentCache(ttl time.Duration, getFn func(string) (authData, error)) *environmentCache { return &environmentCache{ items: make(map[string]*cacheItem), ttl: ttl, @@ -1015,13 +1029,13 @@ func newEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) type cacheItem struct { expiresAt time.Time - value string + value authData } // get queries the cached items, returning cache hits that have not expired. // Cache missed use the configured getFn to populate the cache. -func (c *environmentCache) get(key string) (string, error) { - var val string +func (c *environmentCache) get(key string) (authData, error) { + var val authData // get read lock so that we don't attempt to read from the map // while another routine has a write lock and is actively writing // to the map. @@ -1032,7 +1046,7 @@ func (c *environmentCache) get(key string) (string, error) { } } c.mutex.RUnlock() - if val != "" { + if val.environment != "" { return val, nil } @@ -1051,7 +1065,7 @@ func (c *environmentCache) get(key string) (string, error) { val, err := c.getFn(key) if err != nil { - return "", err + return authData{}, err } c.addItem(key, val, c.ttl) @@ -1060,7 +1074,7 @@ func (c *environmentCache) get(key string) (string, error) { // addItem create a new cache entry in the environment cache. // This is not thread-safe, and should only be used in tests -func (c *environmentCache) addItem(key string, value string, ttl time.Duration) { +func (c *environmentCache) addItem(key string, value authData, ttl time.Duration) { c.items[key] = &cacheItem{ expiresAt: time.Now().Add(ttl), value: value, @@ -1080,6 +1094,7 @@ type AuthInfo struct { APIKeyAccess map[string]bool `json:"api_key_access"` Team TeamInfo `json:"team"` Environment EnvironmentInfo `json:"environment"` + ID string `json:"id"` } func (r *Router) getEnvironmentName(apiKey string) (string, error) { @@ -1087,24 +1102,36 @@ func (r *Router) getEnvironmentName(apiKey string) (string, error) { return "", nil } - env, err := r.environmentCache.get(apiKey) + data, err := r.environmentCache.get(apiKey) if err != nil { return "", err } - return env, nil + return data.environment, nil } -func (r *Router) lookupEnvironment(apiKey string) (string, error) { +// getKeyID returns the Honeycomb ingest key ID associated with the given API +// key. It uses the environment cache, so no additional API call is made if the +// key has already been looked up. Returns an empty string for legacy keys, +// blank keys, or if the lookup fails. +func (r *Router) getKeyID(apiKey string) string { + if apiKey == "" || config.IsLegacyAPIKey(apiKey) { + return "" + } + data, _ := r.environmentCache.get(apiKey) + return data.keyID +} + +func (r *Router) lookupEnvironment(apiKey string) (authData, error) { apiEndpoint := r.Config.GetHoneycombAPI() authURL, err := url.Parse(apiEndpoint) if err != nil { - return "", fmt.Errorf("failed to parse Honeycomb API URL config value. %w", err) + return authData{}, fmt.Errorf("failed to parse Honeycomb API URL config value. %w", err) } authURL.Path = "/1/auth" req, err := http.NewRequest("GET", authURL.String(), nil) if err != nil { - return "", fmt.Errorf("failed to create AuthInfo request. %w", err) + return authData{}, fmt.Errorf("failed to create AuthInfo request. %w", err) } req.Header.Set("x-Honeycomb-team", apiKey) @@ -1112,23 +1139,26 @@ func (r *Router) lookupEnvironment(apiKey string) (string, error) { r.Logger.Debug().WithString("endpoint", authURL.String()).Logf("Attempting to get environment name using API key") resp, err := r.proxyClient.Do(req) if err != nil { - return "", fmt.Errorf("failed sending AuthInfo request to Honeycomb API. %w", err) + return authData{}, fmt.Errorf("failed sending AuthInfo request to Honeycomb API. %w", err) } defer resp.Body.Close() switch { case resp.StatusCode == http.StatusUnauthorized: - return "", fmt.Errorf("received 401 response for AuthInfo request from Honeycomb API - check your API key") + return authData{}, fmt.Errorf("received 401 response for AuthInfo request from Honeycomb API - check your API key") case resp.StatusCode > 299: - return "", fmt.Errorf("received %d response for AuthInfo request from Honeycomb API", resp.StatusCode) + return authData{}, fmt.Errorf("received %d response for AuthInfo request from Honeycomb API", resp.StatusCode) } authinfo := AuthInfo{} if err := json.NewDecoder(resp.Body).Decode(&authinfo); err != nil { - return "", fmt.Errorf("failed to JSON decode of AuthInfo response from Honeycomb API") + return authData{}, fmt.Errorf("failed to JSON decode of AuthInfo response from Honeycomb API") } r.Logger.Debug().WithString("environment", authinfo.Environment.Name).Logf("Got environment") - return authinfo.Environment.Name, nil + return authData{ + environment: authinfo.Environment.Name, + keyID: authinfo.ID, + }, nil } func (r *Router) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { diff --git a/route/route_test.go b/route/route_test.go index 81781a4ec6..1e47db3beb 100644 --- a/route/route_test.go +++ b/route/route_test.go @@ -741,53 +741,53 @@ func TestDependencyInjection(t *testing.T) { func TestEnvironmentCache(t *testing.T) { t.Run("calls getFn on cache miss", func(t *testing.T) { - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { if key != "key" { t.Errorf("expected %s - got %s", "key", key) } - return "test", nil + return authData{environment: "test"}, nil }) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "test" { - t.Errorf("expected %s - got %s", "test", val) + if val.environment != "test" { + t.Errorf("expected %s - got %s", "test", val.environment) } }) t.Run("does not call getFn on cache hit", func(t *testing.T) { - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { t.Errorf("should not have called getFn") - return "", nil + return authData{}, nil }) - cache.addItem("key", "value", time.Second) + cache.addItem("key", authData{environment: "value"}, time.Second) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "value" { - t.Errorf("expected %s - got %s", "value", val) + if val.environment != "value" { + t.Errorf("expected %s - got %s", "value", val.environment) } }) t.Run("ignores expired items", func(t *testing.T) { called := false - cache := newEnvironmentCache(time.Millisecond, func(key string) (string, error) { + cache := newEnvironmentCache(time.Millisecond, func(key string) (authData, error) { called = true - return "value", nil + return authData{environment: "value"}, nil }) - cache.addItem("key", "value", time.Millisecond) + cache.addItem("key", authData{environment: "value"}, time.Millisecond) time.Sleep(time.Millisecond * 5) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "value" { - t.Errorf("expected %s - got %s", "value", val) + if val.environment != "value" { + t.Errorf("expected %s - got %s", "value", val.environment) } if !called { t.Errorf("expected to call getFn") @@ -796,8 +796,8 @@ func TestEnvironmentCache(t *testing.T) { t.Run("errors returned from getFn are propagated", func(t *testing.T) { expectedErr := errors.New("error") - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { - return "", expectedErr + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { + return authData{}, expectedErr }) _, err := cache.get("key") @@ -1206,7 +1206,7 @@ func newBatchRouter(t testing.TB) *Router { Sharder: mockSharder, routerType: types.RouterTypeIncoming, iopLogger: iopLogger{Logger: &logger.NullLogger{}, incomingOrPeer: types.RouterTypeIncoming.String()}, - environmentCache: newEnvironmentCache(time.Second, func(key string) (string, error) { return "test", nil }), + environmentCache: newEnvironmentCache(time.Second, func(key string) (authData, error) { return authData{environment: "test"}, nil }), Tracer: noop.Tracer{}, } var err error diff --git a/rules.md b/rules.md index ee6023bc0e..a401c882d1 100644 --- a/rules.md +++ b/rules.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Rules Documentation This is the documentation for the rules configuration for Honeycomb's Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-04-09 at 22:21:32 UTC. ## The Rules file diff --git a/sample/dynamic.go b/sample/dynamic.go index 7bb9022a81..7be6816e5a 100644 --- a/sample/dynamic.go +++ b/sample/dynamic.go @@ -41,7 +41,7 @@ type DynamicSampler struct { keyFields, nonRootFields []string dynsampler dynsampler.Sampler - metricsRecorder dynsamplerMetricsRecorder + metricsRecorder *dynsamplerMetricsRecorder } func (d *DynamicSampler) Start() error { @@ -56,12 +56,13 @@ func (d *DynamicSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics from the dynsampler-go package - d.metricsRecorder = dynsamplerMetricsRecorder{ - met: d.Metrics, - prefix: "dynamic", + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + met: d.Metrics, + prefix: "dynamic", + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/dynamic_ema.go b/sample/dynamic_ema.go index 8372923c09..0dd19bf811 100644 --- a/sample/dynamic_ema.go +++ b/sample/dynamic_ema.go @@ -56,12 +56,13 @@ func (d *EMADynamicSampler) Start() error { d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "emadynamic", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "emadynamic", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/ema_throughput.go b/sample/ema_throughput.go index e881933e2c..7af96e9bdb 100644 --- a/sample/ema_throughput.go +++ b/sample/ema_throughput.go @@ -58,12 +58,13 @@ func (d *EMAThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "emathroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "emathroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/rules.go b/sample/rules.go index 3f907f3ad1..cf25d547dd 100644 --- a/sample/rules.go +++ b/sample/rules.go @@ -308,158 +308,8 @@ func extractValueFromSpan( return nil, false, false } -// This only gets called when we're using one of the basic operators, and -// there is no datatype specified (meaning that the Matches function has not -// been set). In this case, we need to do some type conversion and comparison -// to determine whether the condition matches the value. +// conditionMatchesValue delegates to config.ConditionMatchesValue. +// It is called when condition.Matches is nil (Datatype was not specified). func conditionMatchesValue(condition *config.RulesBasedSamplerCondition, value interface{}, exists bool) bool { - var match bool - switch exists { - case true: - switch condition.Operator { - case config.Exists: - match = exists - case config.NEQ: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison != equal - } - case config.EQ: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == equal - } - case config.GT: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == more - } - case config.GTE: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == more || comparison == equal - } - case config.LT: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == less - } - case config.LTE: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == less || comparison == equal - } - } - case false: - switch condition.Operator { - case config.NotExists: - match = !exists - } - } - return match -} - -const ( - less = -1 - equal = 0 - more = 1 -) - -func compare(a, b interface{}) (int, bool) { - // a is the tracing data field value. This can be: float64, int64, bool, or string - // b is the Rule condition value. This can be: float64, int64, int, bool, or string - // Note: in YAML config parsing, the Value may be returned as int - // When comparing numeric values, we need to check across the 3 types: float64, int64, and int - - if a == nil { - if b == nil { - return equal, true - } - - return less, true - } - - if b == nil { - return more, true - } - - switch at := a.(type) { - case int64: - switch bt := b.(type) { - case int: - i := int(at) - switch { - case i < bt: - return less, true - case i > bt: - return more, true - default: - return equal, true - } - case int64: - switch { - case at < bt: - return less, true - case at > bt: - return more, true - default: - return equal, true - } - case float64: - f := float64(at) - switch { - case f < bt: - return less, true - case f > bt: - return more, true - default: - return equal, true - } - } - case float64: - switch bt := b.(type) { - case int: - f := float64(bt) - switch { - case at < f: - return less, true - case at > f: - return more, true - default: - return equal, true - } - case int64: - f := float64(bt) - switch { - case at < f: - return less, true - case at > f: - return more, true - default: - return equal, true - } - case float64: - switch { - case at < bt: - return less, true - case at > bt: - return more, true - default: - return equal, true - } - } - case bool: - switch bt := b.(type) { - case bool: - switch { - case !at && bt: - return less, true - case at && !bt: - return more, true - default: - return equal, true - } - } - case string: - switch bt := b.(type) { - case string: - return strings.Compare(at, bt), true - } - } - - return equal, false + return config.ConditionMatchesValue(condition, value, exists) } diff --git a/sample/sample.go b/sample/sample.go index e90b172e4b..eff69f3abd 100644 --- a/sample/sample.go +++ b/sample/sample.go @@ -3,6 +3,7 @@ package sample import ( "fmt" "os" + "slices" "strings" "sync" @@ -24,6 +25,15 @@ type CanSetGoalThroughputPerSec interface { SetGoalThroughputPerSec(int) } +type sharedDynsamplerEntry struct { + dynsampler any + recorder *dynsamplerMetricsRecorder +} + +var samplerFactoryMetrics = []metrics.Metadata{ + {Name: "unique_dynsampler_count", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "Number of unique dynsampler-go samplers created"}, +} + // SamplerFactory is used to create new samplers with common (injected) resources type SamplerFactory struct { Config config.Config `inject:""` @@ -33,8 +43,8 @@ type SamplerFactory struct { peerCount int mutex sync.Mutex - // Shared dynsampler instances to maintain global throughput tracking - sharedDynsamplers map[string]any + // Shared dynsampler instances and their metrics recorders, keyed identically to avoid Nร—overcounting + sharedDynsamplers map[string]sharedDynsamplerEntry // Store original GoalThroughputPerSec values for cluster size calculations. // We need this to recalculate goal throughput values when the cluster size @@ -55,8 +65,8 @@ func (s *SamplerFactory) updatePeerCounts() { } // Update goal throughput for all throughput-based dynsamplers - for dynsamplerKey, dynsamplerInstance := range s.sharedDynsamplers { - if hasThroughput, ok := dynsamplerInstance.(CanSetGoalThroughputPerSec); ok { + for dynsamplerKey, entry := range s.sharedDynsamplers { + if hasThroughput, ok := entry.dynsampler.(CanSetGoalThroughputPerSec); ok { if cfg, ok := s.goalThroughputConfigs[dynsamplerKey]; ok { // Calculate new throughput based on cluster size newThroughput := max(cfg/s.peerCount, 1) @@ -68,30 +78,46 @@ func (s *SamplerFactory) updatePeerCounts() { func (s *SamplerFactory) Start() error { s.peerCount = 1 - s.sharedDynsamplers = make(map[string]any) + s.sharedDynsamplers = make(map[string]sharedDynsamplerEntry) s.goalThroughputConfigs = make(map[string]int) if s.Peers != nil { s.Peers.RegisterUpdatedPeersCallback(s.updatePeerCounts) } + for _, metric := range samplerFactoryMetrics { + s.Metrics.Register(metric) + } return nil } -func getSharedDynsampler[ST any, CT any]( +func getSharedDynsamplerAndRecorder[ST dynsampler.Sampler, CT any]( s *SamplerFactory, dynsamplerKey string, + prefix string, config CT, create func(config CT) ST, -) ST { +) (ST, *dynsamplerMetricsRecorder) { s.mutex.Lock() defer s.mutex.Unlock() - var ok bool - var dynsamplerInstance ST - if dynsamplerInstance, ok = s.sharedDynsamplers[dynsamplerKey].(ST); !ok { - dynsamplerInstance = create(config) - s.sharedDynsamplers[dynsamplerKey] = dynsamplerInstance + if entry, ok := s.sharedDynsamplers[dynsamplerKey]; ok { + if existing, ok := entry.dynsampler.(ST); ok { + return existing, entry.recorder + } } - return dynsamplerInstance + dynsamplerInstance := create(config) + r := &dynsamplerMetricsRecorder{prefix: prefix, met: s.Metrics} + r.RegisterMetrics(dynsamplerInstance) + s.sharedDynsamplers[dynsamplerKey] = sharedDynsamplerEntry{dynsampler: dynsamplerInstance, recorder: r} + return dynsamplerInstance, r +} + +// makeDynsamplerKey builds a dynsampler map key with a sorted copy of fieldList so that +// configs with the same fields in different order always map to the same instance. +func makeDynsamplerKey(prefix, samplerType string, rate int64, fieldList []string) string { + sorted := make([]string, len(fieldList)) + copy(sorted, fieldList) + slices.Sort(sorted) + return fmt.Sprintf("%s:%s:%d:%v", prefix, samplerType, rate, sorted) } // createSampler creates a sampler with shared dynsamplers based on the config type. @@ -107,45 +133,45 @@ func (s *SamplerFactory) createSampler(c any, keyPrefix string) Sampler { case *config.DeterministicSamplerConfig: sampler = &DeterministicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics} case *config.DynamicSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:dynamic:%d:%v", keyPrefix, c.SampleRate, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForDynamicSampler) - sampler = &DynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + dynsamplerKey := makeDynsamplerKey(keyPrefix, "dynamic", c.SampleRate, c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "dynamic", c, createDynForDynamicSampler) + sampler = &DynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.EMADynamicSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:emadynamic:%d:%v", keyPrefix, c.GoalSampleRate, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForEMADynamicSampler) - sampler = &EMADynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + dynsamplerKey := makeDynsamplerKey(keyPrefix, "emadynamic", int64(c.GoalSampleRate), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "emadynamic", c, createDynForEMADynamicSampler) + sampler = &EMADynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.RulesBasedSamplerConfig: sampler = &RulesBasedSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, SamplerFactory: s, samplerPrefix: keyPrefix} case *config.TotalThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:totalthroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForTotalThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "totalthroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "totalthroughput", c, createDynForTotalThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &TotalThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &TotalThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.EMAThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:emathroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForEMAThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "emathroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "emathroughput", c, createDynForEMAThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &EMAThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &EMAThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.WindowedThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:windowedthroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForWindowedThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "windowedthroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "windowedthroughput", c, createDynForWindowedThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &WindowedThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &WindowedThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} default: s.Logger.Error().Logf("unknown sampler type %T. Exiting.", c) os.Exit(1) @@ -161,6 +187,7 @@ func (s *SamplerFactory) createSampler(c any, keyPrefix string) Sampler { s.Logger.Debug().WithField("dataset", keyPrefix).Logf("created implementation for sampler type %T", c) // Update peer counts after creating a sampler s.updatePeerCounts() + s.Metrics.Gauge("unique_dynsampler_count", float64(len(s.sharedDynsamplers))) return sampler } @@ -211,8 +238,8 @@ func (s *SamplerFactory) ClearDynsamplers() { defer s.mutex.Unlock() // Stop all shared dynsamplers - for _, dynSampler := range s.sharedDynsamplers { - if stopper, ok := dynSampler.(interface{ Stop() }); ok { + for _, entry := range s.sharedDynsamplers { + if stopper, ok := entry.dynsampler.(interface{ Stop() }); ok { stopper.Stop() } } @@ -247,6 +274,7 @@ type internalDysamplerMetric struct { } type dynsamplerMetricsRecorder struct { + mu sync.Mutex prefix string dynPrefix string // Used for accessing metrics from dynsampler-go // Stores the last recorded internal metrics produced by dynsampler-go @@ -258,8 +286,8 @@ type dynsamplerMetricsRecorder struct { // RegisterMetrics registers the metrics that will be recorded by this package. // It initializes the necessary metrics and prepares them for recording. // It MUST be called before any calls to RecordMetrics. +// This function is not concurrency safe. func (d *dynsamplerMetricsRecorder) RegisterMetrics(sampler dynsampler.Sampler) { - // Register statistics this package will produce d.dynPrefix = d.prefix + "_" d.lastMetrics = make(map[string]internalDysamplerMetric) dynInternalMetrics := sampler.GetMetrics(d.dynPrefix) @@ -274,6 +302,7 @@ func (d *dynsamplerMetricsRecorder) RegisterMetrics(sampler dynsampler.Sampler) } func (d *dynsamplerMetricsRecorder) RecordMetrics(sampler dynsampler.Sampler, kept bool, rate uint, numTraceKey int) { + d.mu.Lock() for name, val := range sampler.GetMetrics(d.dynPrefix) { m := d.lastMetrics[name] switch m.metricType { @@ -286,6 +315,7 @@ func (d *dynsamplerMetricsRecorder) RecordMetrics(sampler dynsampler.Sampler, ke d.met.Gauge(name, float64(val)) } } + d.mu.Unlock() if kept { d.met.Increment(d.metricNames.numKept) diff --git a/sample/sample_test.go b/sample/sample_test.go index 7afbca5ad8..5124e06f91 100644 --- a/sample/sample_test.go +++ b/sample/sample_test.go @@ -462,6 +462,73 @@ func TestDifferentDatasetsShouldNotShareDynsampler(t *testing.T) { assert.Equal(t, prodImpl.dynsampler.GoalThroughputPerSec, dogfoodImpl.dynsampler.GoalThroughputPerSec) } +// TestFieldListOrderDoesNotAffectDynsamplerSharing verifies that two sampler configs with identical +// FieldList entries in different order share the same dynsampler instance. +func TestFieldListOrderDoesNotAffectDynsamplerSharing(t *testing.T) { + fields1 := []string{"service.name", "http.method", "status.code"} + fields2 := []string{"status.code", "service.name", "http.method"} + + newFactory := func() *SamplerFactory { + factory := &SamplerFactory{ + Logger: &logger.NullLogger{}, + Metrics: &metrics.NullMetrics{}, + } + factory.Start() + t.Cleanup(factory.Stop) + return factory + } + + t.Run("DynamicSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.DynamicSamplerConfig{SampleRate: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.DynamicSamplerConfig{SampleRate: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*DynamicSampler).dynsampler, s2.(*DynamicSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("EMADynamicSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.EMADynamicSamplerConfig{GoalSampleRate: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.EMADynamicSamplerConfig{GoalSampleRate: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*EMADynamicSampler).dynsampler, s2.(*EMADynamicSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("TotalThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.TotalThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.TotalThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*TotalThroughputSampler).dynsampler, s2.(*TotalThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("EMAThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.EMAThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.EMAThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*EMAThroughputSampler).dynsampler, s2.(*EMAThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("WindowedThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.WindowedThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.WindowedThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*WindowedThroughputSampler).dynsampler, s2.(*WindowedThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) +} + // TestClusterSizeUpdatesSamplers verifies that the SamplerFactory properly handles dynamic peer updates // and their impact on throughput-based sampling behavior. func TestClusterSizeUpdatesSamplers(t *testing.T) { diff --git a/sample/totalthroughput.go b/sample/totalthroughput.go index c69294a835..686d176eae 100644 --- a/sample/totalthroughput.go +++ b/sample/totalthroughput.go @@ -57,12 +57,13 @@ func (d *TotalThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "totalthroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "totalthroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/trace_key.go b/sample/trace_key.go index 66c42b2eb4..44f617532a 100644 --- a/sample/trace_key.go +++ b/sample/trace_key.go @@ -28,10 +28,13 @@ type traceKey struct { func newTraceKey(fields []string, useTraceLength bool) *traceKey { // always put the field list in sorted order for easier comparison - sort.Strings(fields) - rootOnlyFields := make([]string, 0, len(fields)/2) - nonRootFields := make([]string, 0, len(fields)/2) - for _, field := range fields { + copiedFields := make([]string, len(fields)) + copy(copiedFields, fields) + sort.Strings(copiedFields) + + rootOnlyFields := make([]string, 0, len(copiedFields)/2) + nonRootFields := make([]string, 0, len(copiedFields)/2) + for _, field := range copiedFields { if strings.HasPrefix(field, config.RootPrefix) { rootOnlyFields = append(rootOnlyFields, field[len(config.RootPrefix):]) continue diff --git a/sample/windowed_throughput.go b/sample/windowed_throughput.go index adaf019be4..11a35e49d0 100644 --- a/sample/windowed_throughput.go +++ b/sample/windowed_throughput.go @@ -54,12 +54,13 @@ func (d *WindowedThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "windowedthroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "windowedthroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/tools/convert/configDataNames.txt b/tools/convert/configDataNames.txt index 793d4bb1e7..06cdaaa88e 100644 --- a/tools/convert/configDataNames.txt +++ b/tools/convert/configDataNames.txt @@ -1,5 +1,5 @@ # Names of groups and fields in the new config file format. -# Automatically generated on 2026-02-25 at 20:49:25 UTC. +# Automatically generated on 2026-04-09 at 22:21:29 UTC. General: - ConfigurationVersion @@ -34,6 +34,8 @@ OpAMP: AccessKeys: - ReceiveKeys (originally APIKeys) + - ReceiveKeyIDs + - AcceptOnlyListedKeys - SendKey @@ -136,6 +138,8 @@ OTelMetrics: - Compression + - AdditionalAttributes + OTelTracing: - Enabled diff --git a/tools/convert/metricsMeta.yaml b/tools/convert/metricsMeta.yaml index 67fadc5de3..032c6d9025 100644 --- a/tools/convert/metricsMeta.yaml +++ b/tools/convert/metricsMeta.yaml @@ -87,10 +87,18 @@ complete: type: Gauge unit: Dimensionless description: number of spans in the peer queue + - name: collector_peer_queue_capacity + type: Gauge + unit: Dimensionless + description: configured maximum number of spans in the peer queue - name: collector_incoming_queue_length type: Gauge unit: Dimensionless description: number of spans in the incoming queue + - name: collector_incoming_queue_capacity + type: Gauge + unit: Dimensionless + description: configured maximum number of spans in the incoming queue - name: collector_peer_queue type: Histogram unit: Dimensionless @@ -107,6 +115,10 @@ complete: type: Gauge unit: Bytes description: current heap allocation + - name: memory_limit + type: Gauge + unit: Bytes + description: configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage) - name: span_received type: Counter unit: Dimensionless @@ -175,6 +187,10 @@ complete: type: Counter unit: Dimensionless description: number of spans kept due to stress relief + - name: events_dropped + type: Counter + unit: Dimensionless + description: number of events dropped - name: trace_kept_sample_rate type: Histogram unit: Dimensionless diff --git a/tools/convert/minimal_config.yaml b/tools/convert/minimal_config.yaml index eb49635629..db18ce37f7 100644 --- a/tools/convert/minimal_config.yaml +++ b/tools/convert/minimal_config.yaml @@ -1,5 +1,5 @@ # sample uncommented config file containing all possible fields -# automatically generated on 2026-02-25 at 20:49:25 UTC +# automatically generated on 2026-04-09 at 22:21:30 UTC General: ConfigurationVersion: 2 MinRefineryVersion: "v2.0" @@ -18,6 +18,9 @@ AccessKeys: ReceiveKeys: - "your-key-goes-here" + ReceiveKeyIDs: + - "your-key-id-goes-here" + AcceptOnlyListedKeys: false SendKey: SetThisToAHoneycombKey SendKeyMode: none @@ -68,6 +71,10 @@ OTelMetrics: Dataset: "Refinery Metrics" ReportingInterval: 30s Compression: gzip + AdditionalAttributes: + "pipeline.id": "'12345'" + "rollout.id": "'67890'" + OTelTracing: Enabled: false APIHost: "https://api.honeycomb.io" diff --git a/tools/convert/templates/configV2.tmpl b/tools/convert/templates/configV2.tmpl index 7f91a2f2a5..4546348b92 100644 --- a/tools/convert/templates/configV2.tmpl +++ b/tools/convert/templates/configV2.tmpl @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created {{ now }} from {{ .Input }} using a template generated on 2026-02-25 at 20:49:24 UTC +# created {{ now }} from {{ .Input }} using a template generated on 2026-04-09 at 22:21:28 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -165,15 +165,33 @@ AccessKeys: ## will be proxied through to the upstream API directly without modifying ## keys. ## - ## Not eligible for live reload. + ## Eligible for live reload. {{ renderStringarray .Data "ReceiveKeys" "APIKeys" "your-key-goes-here" }} + ## ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will + ## treat specially. + ## + ## When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + ## Honeycomb ingest key ID matches an entry in this list will be + ## accepted. The key ID is the `id` field returned by the Honeycomb + ## `/1/auth` endpoint; it is distinct from the full API key value. + ## This allows authorization based on key IDs rather than full key + ## values, which avoids storing secret key material in the configuration + ## file. Both `ReceiveKeys` and `ReceiveKeyIDs` may be used + ## simultaneously. + ## Note: This feature does not support legacy API keys. Only Honeycomb + ## Ingest Keys (which have a key ID) are compatible with this setting. + ## + ## Eligible for live reload. + {{ renderStringarray .Data "ReceiveKeyIDs" "ReceiveKeyIDs" "your-key-id-goes-here" }} + ## AcceptOnlyListedKeys is a boolean flag that causes events arriving ## with API keys not in the `ReceiveKeys` list to be rejected. ## - ## If `true`, then only traffic using the keys listed in `ReceiveKeys` is - ## accepted. Events arriving with API keys not in the `ReceiveKeys` list - ## will be rejected with an HTTP `401` error. + ## If `true`, then only traffic using the keys listed in `ReceiveKeys` or + ## whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + ## with API keys not in either list will be rejected with an HTTP `401` + ## error. ## If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. ## This setting is applied **before** the `SendKey` and `SendKeyMode` ## settings. @@ -690,6 +708,22 @@ OTelMetrics: ## Options: none gzip {{ choice .Data "Compression" "Compression" (makeSlice "none" "gzip") "gzip" }} + ## AdditionalAttributes adds the provided attributes as resource + ## attributes on all OpenTelemetry metrics emitted by Refinery. + ## + ## This is useful for injecting deployment-specific metadata (such as a + ## cluster ID or environment name) into metrics so they can be filtered + ## or grouped in the metrics backend. Both keys and values must be + ## strings. + ## When supplying via a environment variable, the value should be a + ## string of comma-separated key-value pairs. When supplying via the + ## command line, the value should be a key value pair. If multiple + ## key-value pairs are needed, each should be supplied via its own + ## command line flag. The key-value pairs must use ':' as the separator. + ## + ## Not eligible for live reload. + {{ renderMap .Data "AdditionalAttributes" "AdditionalAttributes" "pipeline.id:'12345',rollout.id:'67890'" }} + ########################### ## OpenTelemetry Tracing ## ########################### diff --git a/transmit/direct_transmit.go b/transmit/direct_transmit.go index 46d36ec572..607250bab1 100644 --- a/transmit/direct_transmit.go +++ b/transmit/direct_transmit.go @@ -316,24 +316,19 @@ func (d *DirectTransmission) Stop() error { return nil } -// handleBatchFailure handles metrics updates when the entire batch fails -func (d *DirectTransmission) handleBatchFailure(batch []*types.Event) { - d.Metrics.Increment(d.metricKeys.counterSendErrors) - for range batch { - d.Metrics.Down(d.metricKeys.updownQueuedItems) - } -} - -// handleEventError logs an error and updates metrics for a single event -func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte) { +// handleError logs an error with common fields and custom message +func (d *DirectTransmission) handleError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte, logMessage string) { log := d.Logger.Error().WithFields(map[string]any{ - "status_code": statusCode, "api_host": ev.APIHost, "dataset": ev.Dataset, "environment": ev.Environment, "roundtrip_usec": queueTime, }) + if statusCode > 0 { + log = log.WithField("status_code", statusCode) + } + if errorMsg != "" { log = log.WithField("error", errorMsg) } @@ -350,7 +345,30 @@ func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, q } } - log.Logf("error when sending event") + log.Logf(logMessage) +} + +// handleBatchFailure handles metrics updates when the entire batch fails +func (d *DirectTransmission) handleBatchFailure(batch []*types.Event, errorMsg string, logMessage string) { + d.Metrics.Increment(d.metricKeys.counterSendErrors) + now := time.Now().UnixMicro() + if len(batch) > 0 { + queueTime := now - batch[0].EnqueuedUnixMicro + d.handleError(batch[0], 0, queueTime, errorMsg, nil, logMessage) + } + + for _, ev := range batch { + d.Metrics.Histogram(d.metricKeys.histogramQueueTime, float64(now-ev.EnqueuedUnixMicro)) + d.Metrics.Down(d.metricKeys.updownQueuedItems) + } +} + +// handleEventError logs an error and updates metrics for a single event +func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte, logMessage string) { + if logMessage == "" { + logMessage = "error when sending event" + } + d.handleError(ev, statusCode, queueTime, errorMsg, responseBody, logMessage) d.Metrics.Increment(d.metricKeys.counterResponseErrors) d.Metrics.Down(d.metricKeys.updownQueuedItems) d.Metrics.Histogram(d.metricKeys.histogramQueueTime, float64(queueTime)) @@ -407,9 +425,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { if err != nil { // Skip this message and remove it from the list, so we don't // try to account for it again. - d.Logger.Error().WithField("err", err.Error()).Logf("failed to marshal event") - d.Metrics.Down(d.metricKeys.updownQueuedItems) - d.Metrics.Increment(d.metricKeys.counterResponseErrors) + d.handleEventError(wholeBatch[i], 0, time.Now().UnixMicro()-wholeBatch[i].EnqueuedUnixMicro, err.Error(), nil, "failed to marshal event") continue } if len(newPacked) > apiMaxBatchSize { @@ -440,8 +456,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { apiURL, err := buildRequestURL(apiHost, dataset) if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("failed to create request URL") - d.handleBatchFailure(subBatch) + d.handleBatchFailure(subBatch, err.Error(), "failed to create request URL") continue } @@ -471,8 +486,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { req, err = http.NewRequest("POST", apiURL, readerPtr) if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("failed to create request") - d.handleBatchFailure(subBatch) + d.handleBatchFailure(subBatch, err.Error(), "failed to create request") break } @@ -523,13 +537,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { dequeuedAt := d.Clock.Now() if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("http POST failed") - - // Network/connection error - affects all events in batch - for _, ev := range subBatch { - queueTime := dequeuedAt.UnixMicro() - ev.EnqueuedUnixMicro - d.handleEventError(ev, 0, queueTime, err.Error(), nil) - } + d.handleBatchFailure(subBatch, err.Error(), "") continue } @@ -544,15 +552,18 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { if resp.Header.Get("Content-Type") == "application/msgpack" { err = msgpack.NewDecoder(resp.Body).Decode(&batchResponses) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to decode msgpack batch response") } } else { bodyBytes, err := io.ReadAll(resp.Body) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to read response body") } else { err = json.Unmarshal(bodyBytes, &batchResponses) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to decode JSON batch response") } } @@ -569,12 +580,12 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { // Check if we have a response for this event if i >= len(batchResponses) { // Missing response - treat as server error - d.handleEventError(ev, http.StatusInternalServerError, queueTime, "insufficient responses from server", nil) + d.handleEventError(ev, http.StatusInternalServerError, queueTime, "insufficient responses from server", nil, "insufficient responses from server") continue } if batchResponses[i].Status != http.StatusAccepted { - d.handleEventError(ev, batchResponses[i].Status, queueTime, "", nil) + d.handleEventError(ev, batchResponses[i].Status, queueTime, "", nil, "") } else { // Success d.Metrics.Increment(d.metricKeys.counterResponse20x) @@ -610,7 +621,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { for _, ev := range subBatch { queueTime := dequeuedAt.UnixMicro() - ev.EnqueuedUnixMicro - d.handleEventError(ev, resp.StatusCode, queueTime, "", bodyBytes) + d.handleEventError(ev, resp.StatusCode, queueTime, "", bodyBytes, "") } } } diff --git a/transmit/direct_transmit_test.go b/transmit/direct_transmit_test.go index 3cbe9aa9c7..4013d356b6 100644 --- a/transmit/direct_transmit_test.go +++ b/transmit/direct_transmit_test.go @@ -239,6 +239,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer errorServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + // Send 4 events to ensure we get 2 successes and 2 errors sendTestEvents(dt, errorServer.URL, 4, "test-api-key") err := dt.Stop() @@ -267,6 +273,9 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) assert.Equal(t, "test", errorEvent.Fields["environment"]) assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") } }) @@ -280,6 +289,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer errorServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, errorServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -303,6 +318,9 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) assert.Equal(t, http.StatusInternalServerError, errorEvent.Fields["status_code"]) assert.Contains(t, errorEvent.Fields, "response_body") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") } }) @@ -365,7 +383,13 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { })) defer msgpackServer.Close() - dt, mockMetrics, _ := setupDirectTransmissionTest(t) + dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, msgpackServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -380,6 +404,21 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), errors) assert.Equal(t, float64(1), batchesSent) // Single batch containing 2 events assert.Equal(t, float64(2), messagesSent) + + // Verify error log has all expected fields + errorEvents := getErrorEvents(mockLogger) + require.Len(t, errorEvents, 1, "Expected one error log for rejected event") + + errorEvent := errorEvents[0] + assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) + assert.Equal(t, http.StatusBadRequest, errorEvent.Fields["status_code"]) + assert.Equal(t, msgpackServer.URL, errorEvent.Fields["api_host"]) + assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) + assert.Equal(t, "test", errorEvent.Fields["environment"]) + assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") }) t.Run("insufficient responses from server", func(t *testing.T) { @@ -393,6 +432,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer insufficientServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, insufficientServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -408,14 +453,20 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), batchesSent) // Single batch containing 2 events assert.Equal(t, float64(2), messagesSent) - // Verify error log message mentions insufficient responses + // Verify error log has all expected fields errorEvents := getErrorEvents(mockLogger) require.Len(t, errorEvents, 1, "Expected exactly one error log for the missing response") errorEvent := errorEvents[0] - assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) + assert.Equal(t, "insufficient responses from server", errorEvent.Fields["error"]) assert.Equal(t, http.StatusInternalServerError, errorEvent.Fields["status_code"]) + assert.Equal(t, insufficientServer.URL, errorEvent.Fields["api_host"]) + assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) + assert.Equal(t, "test", errorEvent.Fields["environment"]) assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") }) t.Run("response decode errors", func(t *testing.T) { @@ -427,7 +478,13 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { })) defer decodeErrorServer.Close() - dt, mockMetrics, _ := setupDirectTransmissionTest(t) + dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, decodeErrorServer.URL, 1, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -440,6 +497,16 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), decodeErrors) assert.Equal(t, float64(1), batchesSent) assert.Equal(t, float64(1), messagesSent) + + // Verify decode error log has context fields + var foundErrorLog bool + for _, event := range mockLogger.Events { + if msg, ok := event.Fields["error"].(string); ok && strings.Contains(msg, "failed to decode msgpack batch response") { + foundErrorLog = true + break + } + } + require.True(t, foundErrorLog, "Expected decode error log") }) t.Run("event over 1M size", func(t *testing.T) { @@ -453,8 +520,14 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + // Configure AdditionalErrorFields + mockCfg := &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + dt.Config = mockCfg + // Create an event with data over 1M - eventData := types.NewPayload(&config.MockConfig{}, map[string]any{ + eventData := types.NewPayload(mockCfg, map[string]any{ "large_field": strings.Repeat("a", 1024*1024+1000), "event_id": 1, }) @@ -480,15 +553,21 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(0), success) assert.Equal(t, float64(1), errors) - // Verify error log message about oversized event - var oversizedFound bool + // Verify error log has all expected fields + var oversizedLog *logger.MockLoggerEvent for _, event := range mockLogger.Events { - if errorMsg, ok := event.Fields["err"].(string); ok && strings.Contains(errorMsg, "exceeds max event size") { - oversizedFound = true + if msg, ok := event.Fields["error"].(string); ok && strings.Contains(msg, "failed to marshal event") { + oversizedLog = event break } } - require.True(t, oversizedFound, "Expected error log for oversized event") + require.NotNil(t, oversizedLog, "Expected error log for oversized event") + + assert.Equal(t, server.URL, oversizedLog.Fields["api_host"]) + assert.Equal(t, "test-dataset", oversizedLog.Fields["dataset"]) + assert.Equal(t, "test", oversizedLog.Fields["environment"]) + assert.Contains(t, oversizedLog.Fields, "roundtrip_usec") + assert.Contains(t, oversizedLog.Fields, "error") }) } @@ -728,7 +807,7 @@ func TestDirectTransmission(t *testing.T) { // Verify all events were queued and dequeued, net = 0 assert.Equal(t, float64(0), queuedItems) // Verify queue time histogram was updated for all events - assert.Equal(t, expectedEvents, mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) + assert.Equal(t, len(allEvents), mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) // Verify batch and message counts // Dataset A: 5 events -> 2 batches (3+2) @@ -815,7 +894,7 @@ func TestDirectTransmissionBatchSizeLimit(t *testing.T) { assert.Equal(t, float64(expectedEvents), success) assert.Equal(t, float64(len(allEvents)-expectedEvents), errors) assert.Equal(t, float64(0), queuedItems) - assert.Equal(t, expectedEvents, mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) + assert.Equal(t, len(allEvents), mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) // Verify batch and message counts - events are large so batches will be smaller assert.Greater(t, batchesSent, float64(0), "Should have sent at least one batch") @@ -1129,6 +1208,9 @@ func TestDirectTransmissionRetryLogic(t *testing.T) { if tt.expectSuccess { assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_response_20x") + } else if tt.statusCode == 0 { + // Network/timeout error: whole batch failed before any response + assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_send_errors") } else { assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_response_errors") }