Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a594b96
Extract stitch-service package
AlexAxthelm Jun 22, 2026
bc93ec5
extract jobs manager into package
AlexAxthelm Jun 22, 2026
add454e
update repo infrastructure
AlexAxthelm Jun 22, 2026
395e2ba
Update entity linkage to use new packages
AlexAxthelm Jun 22, 2026
b08f643
Update EL frontend
AlexAxthelm Jun 22, 2026
225d9fe
style: forbidden patterns
AlexAxthelm Jun 23, 2026
288d856
CodeQL: sub ellipsis for one-line docstrings
AlexAxthelm Jun 23, 2026
ebea67c
Extract Auth to `stitch-service` package
AlexAxthelm Jun 23, 2026
f5cc3d3
better handling of terminal states and job restarts
AlexAxthelm Jun 23, 2026
baf555b
Convert `stitch-llm` to use new packages
AlexAxthelm Jun 23, 2026
fa4c791
Update LLM frontend to reflect changes in deployment
AlexAxthelm Jun 23, 2026
eff24dc
Better interface for jobs results in UI
AlexAxthelm Jun 23, 2026
810688d
Rework EL UI to use new job components
AlexAxthelm Jun 23, 2026
250e243
rework ETL ui with job components
AlexAxthelm Jun 23, 2026
c9d24c8
fix test failing on `--exact`
AlexAxthelm Jun 23, 2026
2803a54
safer default
AlexAxthelm Jun 23, 2026
ba0d8cb
document expected behavior in LLM
AlexAxthelm Jun 23, 2026
4f58e8d
remove limit on `/jobs` endpoint
AlexAxthelm Jun 23, 2026
0112054
extract duplicate `initiated_by` logic
AlexAxthelm Jun 23, 2026
70cc8be
Sanitize LLM errors rather than surfacing raw error to user
AlexAxthelm Jun 23, 2026
7f41c16
reuse error parsing in frontend
AlexAxthelm Jun 23, 2026
3edb8ef
unify call parameters for jobs
AlexAxthelm Jun 23, 2026
96fec46
Decouple auth and OIDC configs
AlexAxthelm Jun 23, 2026
21cae57
Ensure AsyncClient closes
AlexAxthelm Jun 24, 2026
60b692a
Merge branch 'main' into feat/stitch-service
AlexAxthelm Jun 24, 2026
33abede
Extract OTel from API into new package, and call in new pkgs
AlexAxthelm Jun 24, 2026
349a06e
Address code review
AlexAxthelm Jun 24, 2026
e830e07
Merge branch 'main' into feat/stitch-service
AlexAxthelm Jun 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,30 @@ pkg-test-ogsi:
pkg-test-exact-ogsi:
$(MAKE) uv-test-target-exact PKG=stitch-ogsi TEST_PATH=packages/stitch-ogsi

pkg-build: pkg-build-auth pkg-build-client pkg-build-models pkg-build-ogsi
pkg-test: pkg-test-auth pkg-test-client pkg-test-models pkg-test-ogsi
pkg-test-exact: pkg-test-exact-auth pkg-test-exact-client pkg-test-exact-models pkg-test-exact-ogsi
pkg-build-service:
$(UV) build --package stitch-service
pkg-test-service:
$(MAKE) uv-test-target PKG=stitch-service TEST_PATH=packages/stitch-service
pkg-test-exact-service:
$(MAKE) uv-test-target-exact PKG=stitch-service TEST_PATH=packages/stitch-service

pkg-build-jobs:
$(UV) build --package stitch-jobs
pkg-test-jobs:
$(MAKE) uv-test-target PKG=stitch-jobs TEST_PATH=packages/stitch-jobs
pkg-test-exact-jobs:
$(MAKE) uv-test-target-exact PKG=stitch-jobs TEST_PATH=packages/stitch-jobs

pkg-build-observability:
$(UV) build --package stitch-observability
pkg-test-observability:
$(MAKE) uv-test-target PKG=stitch-observability TEST_PATH=packages/stitch-observability
pkg-test-exact-observability:
$(MAKE) uv-test-target-exact PKG=stitch-observability TEST_PATH=packages/stitch-observability

pkg-build: pkg-build-auth pkg-build-client pkg-build-models pkg-build-ogsi pkg-build-service pkg-build-jobs pkg-build-observability
pkg-test: pkg-test-auth pkg-test-client pkg-test-models pkg-test-ogsi pkg-test-service pkg-test-jobs pkg-test-observability
pkg-test-exact: pkg-test-exact-auth pkg-test-exact-client pkg-test-exact-models pkg-test-exact-ogsi pkg-test-exact-service pkg-test-exact-jobs pkg-test-exact-observability

# ---------------------------------------------------------------------
# Deployments
Expand Down Expand Up @@ -291,6 +312,9 @@ follow-stack-logs:
pkg-build-client pkg-test-client pkg-test-exact-client \
pkg-build-models pkg-test-models pkg-test-exact-models \
pkg-build-ogsi pkg-test-ogsi pkg-test-exact-ogsi \
pkg-build-service pkg-test-service pkg-test-exact-service \
pkg-build-jobs pkg-test-jobs pkg-test-exact-jobs \
pkg-build-observability pkg-test-observability pkg-test-exact-observability \
\
# API
api-build api-test api-test-exact api-dev stack-api-dev \
Expand Down
2 changes: 2 additions & 0 deletions deployments/api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"sqlalchemy>=2.0.44",
"stitch-auth",
"stitch-models",
"stitch-observability",
"stitch-ogsi",
]

Expand Down Expand Up @@ -47,4 +48,5 @@ addopts = ["-v", "--strict-markers", "--tb=short"]
[tool.uv.sources]
stitch-auth = { workspace = true }
stitch-models = { workspace = true }
stitch-observability = { workspace = true }
stitch-ogsi = { workspace = true }
159 changes: 33 additions & 126 deletions deployments/api/src/stitch/api/observability/tracing.py
Original file line number Diff line number Diff line change
@@ -1,139 +1,46 @@
"""OpenTelemetry tracing setup for the API.

Span *generation* is handled by auto-instrumentation (FastAPI + SQLAlchemy);
this module owns span *export*, which is configurable:

* ``console`` (default) — finished spans are emitted as structured log records
through the existing :class:`JsonFormatter` (see :mod:`logging_config`), so
local dev gets full trace data on stdout **without** running the collector /
Jaeger sidecars. This is the "log what OTel would send" path.
* ``otlp`` — spans are shipped via OTLP/gRPC to the collector (``→`` Jaeger).
* ``none`` — tracing is disabled entirely.

Sampling uses ``ParentBased(root=TraceIdRatioBased(ratio))`` so the API honors
an upstream caller's sampling decision (propagated via the W3C ``traceparent``
header) and only samples independently when it is the root of a trace. The
ratio defaults to 1.0 (capture everything) for local dev.
"""OpenTelemetry tracing for the API — a thin wrapper over the shared
``stitch.observability`` package (one source of truth across services).

Keeps this module's historical surface (``SERVICE_NAME``,
``configure_tracing(settings)``, ``instrument_fastapi``, ``instrument_sqlalchemy``,
``LoggingSpanExporter``) so call sites (``main.py``, ``db/config.py``) and tests
don't change. The API's query-timing / request-logging / sinks layer stays
API-specific (it hangs off the SQLAlchemy engine).
"""

import logging
from typing import TYPE_CHECKING

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
SimpleSpanProcessor,
SpanExporter,
SpanExportResult,
from stitch.observability import (
LoggingSpanExporter,
configure_tracing as _configure_tracing,
instrument_fastapi,
instrument_sqlalchemy,
)
from opentelemetry.sdk.trace.sampling import ParentBased, TraceIdRatioBased

if TYPE_CHECKING:
from collections.abc import Sequence

from fastapi import FastAPI
from opentelemetry.sdk.trace import ReadableSpan
from sqlalchemy.engine import Engine
from opentelemetry.sdk.trace import TracerProvider

from ..settings import Settings

SERVICE_NAME = "stitch-api"

_span_logger = logging.getLogger("stitch.api.observability.trace")


class LoggingSpanExporter(SpanExporter):
"""Export finished spans as structured log records instead of shipping them
to a collector.

Each span becomes one ``stitch.api.observability.trace`` log record whose
``event`` dict the :class:`JsonFormatter` flattens to the top level, so
fields like ``trace_id`` / ``duration_ms`` are directly queryable and sit
alongside the request / query events on the same stdout stream.
"""

def export(self, spans: "Sequence[ReadableSpan]") -> SpanExportResult:
for span in spans:
ctx = span.get_span_context()
parent = span.parent
duration_ms = (
round((span.end_time - span.start_time) / 1e6, 2)
if span.end_time is not None and span.start_time is not None
else None
)
_span_logger.info(
"span",
extra={
"event": {
"span_name": span.name,
"trace_id": format(ctx.trace_id, "032x"),
"span_id": format(ctx.span_id, "016x"),
"parent_span_id": format(parent.span_id, "016x")
if parent is not None
else None,
"kind": span.kind.name,
"duration_ms": duration_ms,
"status": span.status.status_code.name,
"attributes": dict(span.attributes or {}),
}
},
)
return SpanExportResult.SUCCESS

def force_flush(self, timeout_millis: int = 30_000) -> bool:
return True


def configure_tracing(settings: "Settings") -> TracerProvider | None:
"""Install the global tracer provider, or return ``None`` if disabled.

Call once at startup, before the first span is created. Idempotency is not
guaranteed — ``set_tracer_provider`` warns if called twice.
"""
if not settings.otel_enabled or settings.otel_traces_exporter == "none":
return None

resource = Resource.create(
{
"service.name": SERVICE_NAME,
"service.version": settings.app_version or "unknown",
"deployment.environment": settings.environment_name,
}
__all__ = [
"SERVICE_NAME",
"LoggingSpanExporter",
"configure_tracing",
"instrument_fastapi",
"instrument_sqlalchemy",
]


def configure_tracing(settings: "Settings") -> "TracerProvider | None":
"""Install the API's global tracer provider, or ``None`` if disabled."""
return _configure_tracing(
service_name=SERVICE_NAME,
enabled=settings.otel_enabled,
exporter=settings.otel_traces_exporter,
otlp_endpoint=settings.otel_exporter_otlp_endpoint,
sample_ratio=settings.otel_sample_ratio,
version=settings.app_version or "unknown",
environment=settings.environment_name,
)
sampler = ParentBased(root=TraceIdRatioBased(settings.otel_sample_ratio))
provider = TracerProvider(resource=resource, sampler=sampler)

if settings.otel_traces_exporter == "otlp":
# endpoint=None lets the exporter fall back to OTEL_EXPORTER_OTLP_ENDPOINT
# / the localhost default.
exporter = OTLPSpanExporter(endpoint=settings.otel_exporter_otlp_endpoint)
provider.add_span_processor(BatchSpanProcessor(exporter))
else: # "console" — log spans to stdout, no sidecar required.
provider.add_span_processor(SimpleSpanProcessor(LoggingSpanExporter()))

trace.set_tracer_provider(provider)
return provider


def instrument_fastapi(app: "FastAPI") -> None:
"""Auto-instrument the FastAPI app (server spans + traceparent extraction).

URL query strings are intentionally left intact — they're the diagnostic
payload for the performance work this serves. When a retained backend makes
aggregate PII a concern (cloud), scrub them at the collector's egress
(an ``attributes``/``redaction`` processor) rather than blinding local dev.
"""
FastAPIInstrumentor.instrument_app(app)


def instrument_sqlalchemy(engine: "Engine") -> None:
"""Auto-instrument a (sync) SQLAlchemy engine for per-query spans.

Pass ``async_engine.sync_engine`` for an ``AsyncEngine``.
"""
SQLAlchemyInstrumentor().instrument(engine=engine)
3 changes: 2 additions & 1 deletion deployments/api/tests/observability/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from stitch.api.observability.tracing import LoggingSpanExporter, configure_tracing
from stitch.api.settings import Settings

_TRACE_LOGGER = "stitch.api.observability.trace"
# Span log records now come from the shared stitch-observability exporter.
_TRACE_LOGGER = "stitch.observability.trace"


@pytest.fixture
Expand Down
6 changes: 6 additions & 0 deletions deployments/entity-linkage/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os

# Disable tracing for the suite before the app module imports and runs
# configure_tracing (mirrors the API's rootdir conftest). An env var set here
# wins over the .env file's value via pydantic-settings precedence.
os.environ.setdefault("OTEL_TRACES_EXPORTER", "none")
6 changes: 6 additions & 0 deletions deployments/entity-linkage/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ dependencies = [
"pydantic-settings>=2.12.0",
"stitch-auth",
"stitch-client",
"stitch-jobs",
"stitch-models",
"stitch-observability",
"stitch-ogsi",
"stitch-service",
]

[build-system]
Expand Down Expand Up @@ -41,5 +44,8 @@ addopts = ["-v", "--strict-markers", "--tb=short"]
[tool.uv.sources]
stitch-auth = { workspace = true }
stitch-client = { workspace = true }
stitch-jobs = { workspace = true }
stitch-models = { workspace = true }
stitch-observability = { workspace = true }
stitch-ogsi = { workspace = true }
stitch-service = { workspace = true }
Loading
Loading