Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions backend/docs/data-enrichment-pipeline.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Data Enrichment Pipeline

Incoming records are enriched before persistence so stored data carries source metadata, rule-based tags, and derived fields that downstream search, alerting, and review workflows can use.

## Flow

1. Ingestion normalizes an incoming payload into the service-specific record shape.
2. The enrichment pipeline selects provider adapters that support the record type.
3. Each adapter returns a patch containing `metadata`, `tags`, and/or `derivedFields`.
4. The pipeline merges patches, validates the final enrichment output, and retries transient adapter failures.
5. The enriched record is stored with explicit enrichment columns and embedded source attribution.

For bridge incidents, enrichment runs in `IncidentIngestionService.ingest()` before duplicate checks, incident inserts, review queue writes, and ingestion history writes.

## Incident Enrichment

The default incident adapters add:

- Metadata: provider, record type, source type, source external ID, source host, receipt timestamp, asset presence, severity weight, and follow-up action count.
- Tags: source, severity, bridge, asset, stablecoin classification, source host, and manual-review workflow tags.
- Derived fields: normalized asset code, source host, occurred-at ISO value, priority score, risk band, and age in milliseconds.

Persisted incident fields:

- `bridge_incidents.enrichment_metadata`
- `bridge_incidents.enrichment_tags`
- `bridge_incidents.derived_fields`
- `bridge_incidents.enrichment_validation`
- `bridge_incident_review_queue.enriched_payload`
- `bridge_incident_ingestion_history.enrichment_metadata`
- `bridge_incident_ingestion_history.enrichment_tags`
- `bridge_incident_ingestion_history.derived_fields`

## Provider Adapters

Adapters implement `EnrichmentProviderAdapter` and can be added to `createDefaultEnrichmentAdapters()`:

```ts
export interface EnrichmentProviderAdapter {
name: string;
supports(record: EnrichmentRecord): boolean;
enrich(record: EnrichmentRecord): Promise<EnrichmentPatch> | EnrichmentPatch;
}
```

Adapters should keep external lookups narrow and return only enrichment patches. The pipeline owns retries, merging, and validation.

## Validation And Retries

Validation requires:

- `metadata` is an object.
- `tags` is an array of normalized strings matching `^[a-z0-9:_-]+$`.
- `derivedFields` is an object.

Adapter failures are classified by `RetryPolicyService`. Transient, timeout, and rate-limit failures are retried with exponential backoff and jitter; permanent failures are surfaced immediately.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import type { Knex } from "knex";

export async function up(knex: Knex): Promise<void> {
await knex.schema.alterTable("bridge_incidents", (table) => {
table.jsonb("enrichment_metadata").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
table.specificType("enrichment_tags", "text[]").notNullable().defaultTo(knex.raw("'{}'::text[]"));
table.jsonb("derived_fields").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
table.jsonb("enrichment_validation").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
});

await knex.schema.alterTable("bridge_incident_review_queue", (table) => {
table.jsonb("enriched_payload").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
});

await knex.schema.alterTable("bridge_incident_ingestion_history", (table) => {
table.jsonb("enrichment_metadata").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
table.specificType("enrichment_tags", "text[]").notNullable().defaultTo(knex.raw("'{}'::text[]"));
table.jsonb("derived_fields").notNullable().defaultTo(knex.raw("'{}'::jsonb"));
});

await knex.schema.raw(`
CREATE INDEX IF NOT EXISTS bridge_incidents_enrichment_tags_idx
ON bridge_incidents USING GIN (enrichment_tags)
`);
}

export async function down(knex: Knex): Promise<void> {
await knex.schema.raw("DROP INDEX IF EXISTS bridge_incidents_enrichment_tags_idx");

await knex.schema.alterTable("bridge_incident_ingestion_history", (table) => {
table.dropColumn("enrichment_metadata");
table.dropColumn("enrichment_tags");
table.dropColumn("derived_fields");
});

await knex.schema.alterTable("bridge_incident_review_queue", (table) => {
table.dropColumn("enriched_payload");
});

await knex.schema.alterTable("bridge_incidents", (table) => {
table.dropColumn("enrichment_metadata");
table.dropColumn("enrichment_tags");
table.dropColumn("derived_fields");
table.dropColumn("enrichment_validation");
});
}
123 changes: 123 additions & 0 deletions backend/src/services/enrichment/enrichmentPipeline.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { retryPolicyService, type RetryPolicyService } from "../retryPolicy.service.js";
import type {
EnrichmentPatch,
EnrichmentProviderAdapter,
EnrichmentRecord,
EnrichmentResult,
EnrichmentValidationResult,
} from "./types.js";
import { createDefaultEnrichmentAdapters } from "./providerAdapters.js";

function uniqueTags(tags: string[]): string[] {
return Array.from(new Set(tags.filter(Boolean))).sort();
}

function mergePatch(target: EnrichmentPatch, patch: EnrichmentPatch): EnrichmentPatch {
return {
metadata: {
...(target.metadata ?? {}),
...(patch.metadata ?? {}),
},
tags: uniqueTags([...(target.tags ?? []), ...(patch.tags ?? [])]),
derivedFields: {
...(target.derivedFields ?? {}),
...(patch.derivedFields ?? {}),
},
};
}

export class EnrichmentValidationError extends Error {
constructor(public readonly validation: EnrichmentValidationResult) {
super("Enrichment validation failed");
}
}

export class EnrichmentPipelineService {
constructor(
private readonly adapters: EnrichmentProviderAdapter[] = createDefaultEnrichmentAdapters(),
private readonly retryPolicy: RetryPolicyService = retryPolicyService,
) {}

async enrich<TData extends Record<string, unknown>>(record: EnrichmentRecord<TData>): Promise<EnrichmentResult<TData>> {
const adapters = this.adapters.filter((adapter) => adapter.supports(record));
let patch: EnrichmentPatch = { metadata: {}, tags: [], derivedFields: {} };
let attempts = 0;

for (const adapter of adapters) {
const adapterPatch = await this.runAdapterWithRetry(adapter, record);
attempts += adapterPatch.attempts;
patch = mergePatch(patch, adapterPatch.patch);
}

const result: EnrichmentResult<TData> = {
record,
metadata: patch.metadata ?? {},
tags: uniqueTags(patch.tags ?? []),
derivedFields: patch.derivedFields ?? {},
validation: { valid: true, issues: [] },
attempts,
};

result.validation = this.validate(result);
if (!result.validation.valid) {
throw new EnrichmentValidationError(result.validation);
}

return result;
}

validate(result: Pick<EnrichmentResult, "metadata" | "tags" | "derivedFields">): EnrichmentValidationResult {
const issues: EnrichmentValidationResult["issues"] = [];

if (!result.metadata || typeof result.metadata !== "object" || Array.isArray(result.metadata)) {
issues.push({ field: "metadata", code: "invalid_metadata", message: "Metadata must be an object" });
}

if (!Array.isArray(result.tags)) {
issues.push({ field: "tags", code: "invalid_tags", message: "Tags must be an array" });
} else {
result.tags.forEach((tag, index) => {
if (typeof tag !== "string" || !/^[a-z0-9:_-]+$/.test(tag)) {
issues.push({ field: `tags.${index}`, code: "invalid_tag", message: "Tags must be normalized strings" });
}
});
}

if (!result.derivedFields || typeof result.derivedFields !== "object" || Array.isArray(result.derivedFields)) {
issues.push({ field: "derivedFields", code: "invalid_derived_fields", message: "Derived fields must be an object" });
}

return { valid: issues.length === 0, issues };
}

private async runAdapterWithRetry<TData extends Record<string, unknown>>(
adapter: EnrichmentProviderAdapter<TData>,
record: EnrichmentRecord<TData>,
): Promise<{ patch: EnrichmentPatch; attempts: number }> {
const policy = this.retryPolicy.getPolicy({ operation: `enrichment.${adapter.name}`, maxRetries: 2, baseDelayMs: 25 });
let attempt = 0;

while (attempt <= policy.maxRetries) {
attempt += 1;
try {
return { patch: await adapter.enrich(record), attempts: attempt };
} catch (error) {
const failureClass = this.retryPolicy.classifyFailure(error);
const exhausted = attempt > policy.maxRetries || !this.retryPolicy.isRetryable(error);
this.retryPolicy.recordRetryMetric(
`enrichment.${adapter.name}`,
exhausted ? "exhausted" : "scheduled",
attempt,
failureClass,
);

if (exhausted) throw error;
await new Promise((resolve) => setTimeout(resolve, this.retryPolicy.getDelayMs(attempt, policy)));
}
}

return { patch: {}, attempts: attempt };
}
}

export const enrichmentPipelineService = new EnrichmentPipelineService();
3 changes: 3 additions & 0 deletions backend/src/services/enrichment/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export * from "./types.js";
export * from "./providerAdapters.js";
export * from "./enrichmentPipeline.service.js";
148 changes: 148 additions & 0 deletions backend/src/services/enrichment/providerAdapters.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import type { EnrichmentProviderAdapter, EnrichmentRecord } from "./types.js";

function normalizeString(value: unknown): string | null {
if (typeof value !== "string") return null;
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}

function normalizeTag(value: string): string {
return value
.trim()
.toLowerCase()
.replace(/[^a-z0-9:_-]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "");
}

function hostFromUrl(value: unknown): string | null {
const url = normalizeString(value);
if (!url) return null;

try {
return new URL(url).hostname.toLowerCase();
} catch {
return null;
}
}

function severityWeight(severity: unknown): number {
switch (normalizeString(severity)?.toLowerCase()) {
case "critical":
return 100;
case "high":
return 80;
case "medium":
return 50;
case "low":
return 20;
default:
return 50;
}
}

function riskBand(weight: number): "critical" | "elevated" | "standard" {
if (weight >= 90) return "critical";
if (weight >= 70) return "elevated";
return "standard";
}

export class IncidentMetadataAdapter implements EnrichmentProviderAdapter {
name = "incident-metadata";

supports(record: EnrichmentRecord): boolean {
return record.recordType === "incident";
}

enrich(record: EnrichmentRecord) {
const occurredAt = normalizeString(record.data.occurredAt);
const sourceHost = hostFromUrl(record.data.sourceUrl);
const assetCode = normalizeString(record.data.assetCode);
const sourceType = normalizeString(record.data.sourceType) ?? "webhook";

return {
metadata: {
provider: record.provider,
recordType: record.recordType,
sourceType,
sourceExternalId: normalizeString(record.data.sourceExternalId),
sourceHost,
receivedAt: new Date().toISOString(),
hasAssetCode: Boolean(assetCode),
},
derivedFields: {
occurredAtIso: occurredAt,
sourceHost,
normalizedAssetCode: assetCode?.toUpperCase() ?? null,
},
};
}
}

export class IncidentTaggingAdapter implements EnrichmentProviderAdapter {
name = "incident-tagging";

supports(record: EnrichmentRecord): boolean {
return record.recordType === "incident";
}

enrich(record: EnrichmentRecord) {
const tags = [
`source:${normalizeString(record.data.sourceType) ?? "webhook"}`,
`severity:${normalizeString(record.data.severity) ?? "medium"}`,
];

const bridgeId = normalizeString(record.data.bridgeId);
const assetCode = normalizeString(record.data.assetCode);
const sourceHost = hostFromUrl(record.data.sourceUrl);

if (bridgeId) tags.push(`bridge:${bridgeId}`);
if (assetCode) {
tags.push(`asset:${assetCode}`);
if (["USDC", "USDT", "EURC", "DAI"].includes(assetCode.toUpperCase())) {
tags.push("asset:stablecoin");
}
}
if (sourceHost) tags.push(`source-host:${sourceHost}`);
if (record.data.requiresManualReview === true) tags.push("workflow:manual-review");

return {
tags: tags.map(normalizeTag).filter(Boolean),
};
}
}

export class IncidentDerivedFieldsAdapter implements EnrichmentProviderAdapter {
name = "incident-derived-fields";

supports(record: EnrichmentRecord): boolean {
return record.recordType === "incident";
}

enrich(record: EnrichmentRecord) {
const weight = severityWeight(record.data.severity);
const followUpActions = Array.isArray(record.data.followUpActions) ? record.data.followUpActions : [];
const occurredAt = normalizeString(record.data.occurredAt);
const occurredMs = occurredAt ? new Date(occurredAt).getTime() : Number.NaN;

return {
metadata: {
severityWeight: weight,
followUpActionCount: followUpActions.length,
},
derivedFields: {
priorityScore: Math.min(100, weight + Math.min(20, followUpActions.length * 5)),
riskBand: riskBand(weight),
ageMs: Number.isNaN(occurredMs) ? null : Math.max(0, Date.now() - occurredMs),
},
};
}
}

export function createDefaultEnrichmentAdapters(): EnrichmentProviderAdapter[] {
return [
new IncidentMetadataAdapter(),
new IncidentTaggingAdapter(),
new IncidentDerivedFieldsAdapter(),
];
}
Loading