Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,10 @@ Vue 3 + TypeScript, Vite, Tailwind, Pinia. Alerting UI present (`src/views/Alert
Single-node Grafana Mimir with S3-compatible backend and multi-tenant Alertmanager. Containerfile, Makefile, docker-compose.yml + docker-compose.local.yml. `scripts/` contains Python helpers (`alert.py`, `alerting_config.py`) for manual testing.

**Alerting integration**:
- Backend (`backend/services/alerting/`) renders Alertmanager YAML from `AlertingConfig` models and pushes via `POST /api/v1/alerts` per tenant. Email templates are Go `html/template`-embedded, en/it locales, firing + resolved variants.
- Backend (`backend/services/alerting/`) holds one `AlertingConfigLayer` per organization in `alert_config_layers` (flat recipient-based shape: `enabled`, `email_recipients[]`, `webhook_recipients[]`, `telegram_recipients[]`, each recipient carries its own `severities[]`; email also `language` + `format`). The effective per-tenant Mimir YAML is the server-side merge of every layer from Owner down to the tenant (union dedup, additive-only). `/alerts/config` only ever returns the caller's own layer — the merged view is internal and never leaves the backend. Templates are Go `html/template`-embedded, en/it locales, firing + resolved variants; both languages ship with every tenant push and the renderer picks per email recipient via per-language dispatchers (`alert_<lang>.html|txt|subject`).
- Collect proxies systems to Alertmanager `alerts`/`silences` with `X-Scope-OrgID` from the authenticated system's org.
- Alertmanager webhooks resolved alerts back to collect `/api/alert_history`, which persists them scoped by `organization_id` (column on `alert_history`, populated from the DB via `system_key` lookup — never trusted from the payload).
- RBAC: `/alerts/config*` is gated on a dedicated `alerts` resource (`read:alerts` for GET, `manage:alerts` for POST/DELETE) — admin/super only. The list/silence endpoints (`/alerts`, `/alerts/history`, `/alerts/silences*`, `/alerts/activity/:fingerprint`, `/systems/:id/alerts*`) stay on `read:systems`/`manage:systems`. The cross-system `/alerts/silences*` set mirrors `/systems/:id/alerts/silences*` 1:1 — same backend `buildSystemAlertSilenceRequest` builds the Mimir payload, so a silence created via either route is interoperable with the other.

### 3.6 Proxy (`proxy/`)

Expand Down Expand Up @@ -370,7 +371,9 @@ Authoritative: `backend/openapi.yaml` (also `make docs` / redocly). High-level r
/api/users/* CRUD + avatar + import/export + password reset + suspend/reactivate
/api/systems/* CRUD + inventory + alerts + regenerate-secret + reachability + export
/api/applications/* CRUD + assign/unassign org + totals/summary/trend
/api/alerts, /api/alerts/{totals,trend,config} active alerts + config + aggregates
/api/alerts, /api/alerts/{totals,trend,stats,history,config} active alerts + config + aggregates + history
/api/alerts/silences/* cross-system silences (mute/unmute) — parallel to /systems/:id/alerts/silences
/api/alerts/activity/:fingerprint per-alert audit timeline (silence created/updated/removed)
/api/filters/{systems,applications,users} UI filter aggregation
/api/rebranding/* per-org per-product asset management
/api/organizations, /api/roles, /api/organization-roles metadata
Expand Down
41 changes: 41 additions & 0 deletions backend/database/migrations/023_add_alert_activity.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Migration 023: Add alert_activity table
-- Append-only timeline of operator actions performed on a single alert
-- (silence created/updated/deleted). The UI renders this in the alert-detail
-- drawer ("Activity" section). Per-alert scoped via (organization_id,
-- fingerprint). Operator "notes" are not a separate concept: they are stored
-- as the comment of the silence, so a note edit is recorded here as a
-- silence_updated event whose details payload includes the comment change.

CREATE TABLE IF NOT EXISTS alert_activity (
id BIGSERIAL PRIMARY KEY,

organization_id VARCHAR(255) NOT NULL,
fingerprint VARCHAR(255) NOT NULL,

-- Action identifier. Open-ended so new event types don't require a schema
-- change; current values: 'silenced', 'silence_updated', 'unsilenced'.
action VARCHAR(50) NOT NULL,

-- Actor identity (denormalized for cheap render). user_id may be NULL for
-- system-driven events (none today, kept for future).
actor_user_id VARCHAR(255),
actor_name VARCHAR(255),

-- Optional silence reference, set on silence-related actions so the
-- DELETE handler can resolve the originating fingerprint without a
-- separate mapping table.
silence_id VARCHAR(255),

-- Free-form structured payload (e.g. comment, end_at, note excerpt).
details JSONB NOT NULL DEFAULT '{}',

created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);

COMMENT ON TABLE alert_activity IS 'Append-only audit timeline of operator actions on individual alerts';
COMMENT ON COLUMN alert_activity.fingerprint IS 'Alertmanager fingerprint (hex hash of labels) of the alert the action targets';
COMMENT ON COLUMN alert_activity.action IS 'Event kind: silenced | silence_updated | unsilenced. Note changes are silence_updated events.';
COMMENT ON COLUMN alert_activity.silence_id IS 'Silence ID associated with the event. Lets DELETE silence resolve the fingerprint.';

CREATE INDEX IF NOT EXISTS idx_alert_activity_org_fp_created_at ON alert_activity(organization_id, fingerprint, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_alert_activity_silence_lookup ON alert_activity(organization_id, silence_id) WHERE silence_id IS NOT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS alert_activity;
49 changes: 49 additions & 0 deletions backend/database/migrations/024_add_alert_config_layers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- Migration 024: alert_config_layers
--
-- One row per organization carrying that org's alerting configuration as a
-- flat recipient-based JSON blob. The effective per-tenant Mimir YAML is
-- the server-side merge of all rows walking up the org hierarchy from the
-- tenant to the Owner:
--
-- Owner.layer → Distributor.layer → Reseller.layer → Customer.layer
--
-- The merge is internal — /alerts/config exposes only the caller's own
-- row, never the merged effective view or anyone else's row.
--
-- Merge rules (additive-only for security-relevant fields):
-- - bool channel toggles (enabled.{email,webhook,telegram}): OR. A
-- descendant cannot disable a channel an ancestor enabled. Non-Owner
-- layers cannot store an explicit false (normalised to null on save).
-- - recipient lists (email/webhook/telegram): union with stable dedup.
-- Dedup keys: email→address, webhook→url, telegram→(bot_token,chat_id).
-- - per-recipient severities[]: union; if any contributor uses [] ("all
-- severities") the merged copy widens back to [].
--
-- Mimir sees a flat YAML per tenant; the layered model is server-internal
-- and invisible to Alertmanager.

CREATE TABLE IF NOT EXISTS alert_config_layers (
organization_id VARCHAR(255) PRIMARY KEY,

-- Serialized AlertingConfigLayer (Go struct):
-- {
-- "enabled": {"email": *bool, "webhook": *bool, "telegram": *bool},
-- "email_recipients": [{address, severities[], language, format}],
-- "webhook_recipients": [{name, url, severities[]}],
-- "telegram_recipients": [{bot_token, chat_id, severities[]}]
-- }
-- Channel toggles are tri-state (null = "no opinion at this layer,
-- inherit from above"). Per-recipient severities=[] means "all".
config_json JSONB NOT NULL,

-- Audit fields. updated_by_user_id stores the logto_id of the user who
-- last saved this layer. updated_by_name is denormalised for cheap UI
-- rendering of "who set this".
updated_by_user_id VARCHAR(255),
updated_by_name VARCHAR(255),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);

COMMENT ON TABLE alert_config_layers IS 'Per-organization alerting config layer. Effective Mimir YAML for a tenant is the merge of all layers from Owner down to that tenant; merge is server-side only and never exposed via API.';
COMMENT ON COLUMN alert_config_layers.config_json IS 'Serialized AlertingConfigLayer: { enabled:{email,webhook,telegram}, email_recipients[], webhook_recipients[], telegram_recipients[] }. Each recipient carries its own severities[]; email recipients additionally carry language+format. Channel toggles are nullable tri-state.';
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS alert_config_layers;
138 changes: 138 additions & 0 deletions backend/entities/local_alert_activity.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
Copyright (C) 2026 Nethesis S.r.l.
SPDX-License-Identifier: AGPL-3.0-or-later
*/

package entities

import (
"database/sql"
"encoding/json"
"fmt"
"time"

"github.com/nethesis/my/backend/database"
)

// AlertActivityAction enumerates the event kinds written to alert_activity.
// New values can be added without a schema change. Note edits are not
// represented as their own action: the operator note IS the silence comment,
// so a comment change shows up as silence_updated.
const (
AlertActivitySilenced = "silenced"
AlertActivitySilenceUpdated = "silence_updated"
AlertActivityUnsilenced = "unsilenced"
)

// AlertActivityEntry is one row of the per-alert audit timeline.
type AlertActivityEntry struct {
ID int64 `json:"id"`
OrganizationID string `json:"organization_id"`
Fingerprint string `json:"fingerprint"`
Action string `json:"action"`
ActorUserID *string `json:"actor_user_id,omitempty"`
ActorName *string `json:"actor_name,omitempty"`
SilenceID *string `json:"silence_id,omitempty"`
Details map[string]interface{} `json:"details"`
CreatedAt time.Time `json:"created_at"`
}

// LocalAlertActivityRepository writes / reads the alert_activity timeline.
type LocalAlertActivityRepository struct {
db *sql.DB
}

func NewLocalAlertActivityRepository() *LocalAlertActivityRepository {
return &LocalAlertActivityRepository{db: database.DB}
}

// Log appends a single event to the activity timeline. Best-effort: callers
// that only want to record audit info should not fail their primary action
// when this returns an error — wrap the call site with a warn-level log.
func (r *LocalAlertActivityRepository) Log(orgID, fingerprint, action, actorUserID, actorName, silenceID string, details map[string]interface{}) error {
if details == nil {
details = map[string]interface{}{}
}
detailsJSON, err := json.Marshal(details)
if err != nil {
return fmt.Errorf("encode details: %w", err)
}
_, err = r.db.Exec(
`INSERT INTO alert_activity (organization_id, fingerprint, action, actor_user_id, actor_name, silence_id, details)
VALUES ($1, $2, $3, NULLIF($4,''), NULLIF($5,''), NULLIF($6,''), $7::jsonb)`,
orgID, fingerprint, action, actorUserID, actorName, silenceID, string(detailsJSON),
)
if err != nil {
return fmt.Errorf("insert alert_activity: %w", err)
}
return nil
}

// ListByFingerprint returns the timeline for one alert, most recent first.
// limit caps the number of rows; values <=0 fall back to 100.
func (r *LocalAlertActivityRepository) ListByFingerprint(orgID, fingerprint string, limit int) ([]AlertActivityEntry, error) {
if limit <= 0 {
limit = 100
}
rows, err := r.db.Query(
`SELECT id, organization_id, fingerprint, action, actor_user_id, actor_name, silence_id, details, created_at
FROM alert_activity
WHERE organization_id = $1 AND fingerprint = $2
ORDER BY created_at DESC, id DESC
LIMIT $3`,
orgID, fingerprint, limit,
)
if err != nil {
return nil, fmt.Errorf("query alert_activity: %w", err)
}
defer func() { _ = rows.Close() }()

out := make([]AlertActivityEntry, 0)
for rows.Next() {
var e AlertActivityEntry
var actorUserID, actorName, silenceID sql.NullString
var detailsRaw []byte
if err := rows.Scan(&e.ID, &e.OrganizationID, &e.Fingerprint, &e.Action, &actorUserID, &actorName, &silenceID, &detailsRaw, &e.CreatedAt); err != nil {
return nil, fmt.Errorf("scan alert_activity: %w", err)
}
if actorUserID.Valid {
e.ActorUserID = &actorUserID.String
}
if actorName.Valid {
e.ActorName = &actorName.String
}
if silenceID.Valid {
e.SilenceID = &silenceID.String
}
if len(detailsRaw) > 0 {
if err := json.Unmarshal(detailsRaw, &e.Details); err != nil {
e.Details = map[string]interface{}{}
}
} else {
e.Details = map[string]interface{}{}
}
out = append(out, e)
}
return out, nil
}

// FindFingerprintBySilenceID returns the fingerprint of the alert that the
// given silence was created against, or empty string if no record exists.
// Used by DeleteSystemAlertSilence to log the unsilence event under the
// correct alert without requiring the caller to pass the fingerprint.
func (r *LocalAlertActivityRepository) FindFingerprintBySilenceID(orgID, silenceID string) (string, error) {
var fp string
err := r.db.QueryRow(
`SELECT fingerprint FROM alert_activity
WHERE organization_id = $1 AND silence_id = $2 AND action = $3
ORDER BY created_at DESC LIMIT 1`,
orgID, silenceID, AlertActivitySilenced,
).Scan(&fp)
if err != nil {
if err == sql.ErrNoRows {
return "", nil
}
return "", fmt.Errorf("lookup fingerprint by silence_id: %w", err)
}
return fp, nil
}
Loading
Loading