diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3086180 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,81 @@ +# Keep the build context lean — anything not required to run the web +# builder in production should be excluded so layer caching stays fast +# and secrets/dev artifacts never end up in the image. + +# VCS / CI +.git/ +.gitignore +.gitattributes +.github/ +.gitleaks.toml + +# Claude Code worktrees and local config +.claude/ + +# Virtualenvs +.venv/ +venv/ +env/ +ENV/ + +# Python caches +__pycache__/ +**/__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +dist/ +*.egg-info/ +*.egg +.eggs/ + +# Tests, coverage, type/lint caches +tests/ +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +.coverage.* +htmlcov/ +.tox/ +coverage.xml + +# Local secrets — never bake into an image +.env +.env.* +!.env.example + +# Editor +.idea/ +.vscode/ +*.swp +*.swo + +# macOS / Windows cruft +.DS_Store +.AppleDouble +.LSOverride +Thumbs.db + +# Tool outputs not needed at runtime +output/ +*.log + +# Docs and dev tooling not used by the runtime +docs/ +examples/ +CHANGELOG.md +CONTRIBUTING.md +SECURITY.md +.pre-commit-config.yaml +.pip-audit-allowlist.txt +.secrets.baseline +scripts/ +Makefile + +# Container artefacts that should not nest into the image itself +Dockerfile +.dockerignore +fly.toml diff --git a/.github/workflows/fly-deploy.yml b/.github/workflows/fly-deploy.yml new file mode 100644 index 0000000..b0c246e --- /dev/null +++ b/.github/workflows/fly-deploy.yml @@ -0,0 +1,18 @@ +# See https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/ + +name: Fly Deploy +on: + push: + branches: + - main +jobs: + deploy: + name: Deploy app + runs-on: ubuntu-latest + concurrency: deploy-group # optional: ensure only one action runs at a time + steps: + - uses: actions/checkout@v4 + - uses: superfly/flyctl-actions/setup-flyctl@master + - run: flyctl deploy --remote-only + env: + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 48e9367..af87f0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,45 @@ Versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added +- **Per-IP rate limit on `POST /generate`** (10 requests / hour / + client IP, sliding window). Caps abuse cost at the + most-expensive route — each `/generate` triggers an Anthropic + narrative call costing ~$0.10–$0.30, so without a cap a single + abusive IP could burn the published Anthropic spend ceiling in + minutes. Limit lives in-process (`web.ratelimit.RateLimiter`) and + is keyed on `Fly-Client-IP` (Fly's edge proxy) → `X-Forwarded-For` + → socket peer; `request.client.host` alone would be useless on + Fly because it is one of Fly's load-balancer addresses. Bounded + to 10 000 tracked IPs (LRU-by-insert eviction) so a flood of + unique sources cannot OOM the process. Over-quota responses are + HTTP 429 with a `Retry-After` header, returned before the + multipart body is parsed (FastAPI dependency runs first when the + dep only takes `Request`). Picked over a global cap because per-IP + bounds the worst case from a single attacker; a global counter + would not have helped against a botnet hitting each IP once and + would have hurt legitimate concurrent use during a launch. +- **Fly.io deployment config** (`Dockerfile`, `fly.toml`, `.dockerignore`) + for the web builder. The image is `python:3.12-slim` with the project + installed in editable mode so `trailstory.renderers.html` keeps + finding the top-level `templates/` directory at runtime; the renderer + resolves it via `Path(__file__).parents[2] / "templates"`, which only + works when the package source lives next to `templates/` — a + non-editable install would relocate `trailstory/` into site-packages + and break the path. `fly.toml` ships a 512 MB shared-cpu-1x VM in + `fra` with a `/healthz` HTTP check, `force_https`, auto-stop on idle, + and no persistent volume — the 30-min retention sweep runs against + `/tmp` and restarts wipe in-flight workspaces, which is *stronger* + than the published privacy promise. +- **`/version` endpoint** that returns the running image's git SHA + (sourced from the `GIT_SHA` build arg, falls back to `"unknown"` for + local runs). Wired to the `make deploy` target so every Fly deploy + stamps the current commit into the image and a deploy-correlated bug + can be tied back to the source without log archaeology. +- **`make docker-build` / `make deploy` targets**. `deploy` refuses to + ship a dirty working tree and forwards `GIT_SHA` to + `flyctl deploy --build-arg`, so the SHA in `/version` always matches + the commit Fly built from. `docker-build` exists for local smoke + tests before the first deploy. - **Streaming narrative generation via Server-Sent Events** for the web builder. `POST /generate` now runs the deterministic prep phase (parse GPX + load_photos + persist `pending.json`), wipes the raw diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..aef9d0b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,50 @@ +# syntax=docker/dockerfile:1.7 +# +# Trailstory web builder — single-stage image for Fly.io. +# +# We install the package in editable mode on purpose: the HTML renderer +# resolves the memory template via ``Path(__file__).parents[2] / "templates"`` +# (see trailstory/renderers/html.py:27), which only works when the +# package source lives next to the top-level templates/ directory at +# runtime. A non-editable install would relocate trailstory/ into +# site-packages and break that path. See CLAUDE.md → "Architecture and +# file map" for the layout this depends on. + +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 + +WORKDIR /app + +# Project metadata first so the dependency-install layer is cached on +# any change to source files but invalidated when pyproject.toml moves. +COPY pyproject.toml README.md LICENSE ./ +COPY trailstory/ ./trailstory/ +COPY web/ ./web/ +COPY templates/ ./templates/ + +# Editable install: dependencies + package, with trailstory/ pointing +# back at /app/trailstory so the renderer's parents[2] still resolves +# to /app/templates. +RUN pip install -e . + +# Drop privileges. The retention sweeper writes under $TMPDIR +# (default /tmp) which is world-writeable, so a non-root user is fine. +RUN useradd --create-home --uid 1000 app && chown -R app:app /app +USER app + +# Build identity — set by `make deploy` / `flyctl deploy --build-arg`. +# Surfaced via GET /version so a deploy can be tied back to a commit. +ARG GIT_SHA=unknown +ENV GIT_SHA=${GIT_SHA} + +# Fly injects PORT at runtime; default keeps `docker run` ergonomic. +ENV PORT=8080 +EXPOSE 8080 + +# Shell form so $PORT expands. One uvicorn worker — the LLM call is +# I/O-bound and v0 traffic does not justify multi-worker complexity. +CMD ["sh", "-c", "exec uvicorn web.__main__:app --host 0.0.0.0 --port ${PORT}"] diff --git a/Makefile b/Makefile index b838621..72c4a4e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: help setup dev install install-hooks test lint format typecheck ci clean generate test-render golden-update eval eval-live eval-update-golden web web-dev +.PHONY: help setup dev install install-hooks test lint format typecheck ci clean generate test-render golden-update eval eval-live eval-update-golden web web-dev docker-build deploy PYTHON ?= python3.12 VENV := .venv @@ -109,6 +109,26 @@ web: ## Run the FastAPI builder against the real Anthropic API (n web-dev: ## Run the FastAPI builder with a fake LLM (free, deterministic narrative) $(PY) -m web --fake-llm --reload +# ── Deploy ───────────────────────────────────────────────────────────────────── + +# Resolved at make-invocation time so the same value reaches the Dockerfile +# build arg and the /version endpoint. `--short` keeps the SHA readable. +GIT_SHA := $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) + +docker-build: ## Build the production image locally (smoke test before deploy) + docker build --build-arg GIT_SHA=$(GIT_SHA) -t trailstory:$(GIT_SHA) -t trailstory:latest . + +deploy: ## Deploy to Fly.io with the current git SHA stamped into /version + @command -v flyctl >/dev/null 2>&1 || { \ + echo "→ flyctl not found. Install: brew install flyctl"; exit 1; \ + } + @if ! git diff-index --quiet HEAD --; then \ + echo "→ working tree is dirty. Commit or stash before deploying."; \ + git status --short; \ + exit 1; \ + fi + flyctl deploy --build-arg GIT_SHA=$(GIT_SHA) + # ── Cleanup ──────────────────────────────────────────────────────────────────── clean: ## Remove build artifacts, cache, and generated output diff --git a/fly.toml b/fly.toml new file mode 100644 index 0000000..b720f4d --- /dev/null +++ b/fly.toml @@ -0,0 +1,34 @@ +# fly.toml app configuration file generated for trailstory on 2026-04-30T17:16:08+02:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'trailstory' +primary_region = 'fra' + +[build] + +[env] + PORT = '8080' + +[http_service] + internal_port = 8080 + force_https = true + auto_stop_machines = 'stop' + auto_start_machines = true + min_machines_running = 0 + processes = ['app'] + + [[http_service.checks]] + interval = '30s' + timeout = '5s' + grace_period = '10s' + method = 'GET' + path = '/healthz' + +[[vm]] + size = 'shared-cpu-1x' + memory = '512mb' + cpu_kind = 'shared' + cpus = 1 + memory_mb = 512 diff --git a/tests/test_ratelimit.py b/tests/test_ratelimit.py new file mode 100644 index 0000000..3ae9071 --- /dev/null +++ b/tests/test_ratelimit.py @@ -0,0 +1,139 @@ +"""Tests for the per-IP rate limiter on ``/generate``. + +The limiter unit tests use injected ``now`` timestamps so we exercise +window expiry without ``time.sleep`` (deterministic, sub-millisecond). +The ``client_ip`` tests use a stubbed Starlette ``Request`` rather +than the FastAPI ``TestClient`` so we can assert each header path in +isolation. The route-level 429 path is exercised in ``test_web.py``. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from web.ratelimit import RateLimiter, client_ip + + +def _request(headers: dict[str, str], peer: str | None = "127.0.0.1") -> Any: + """Build a minimal stand-in for :class:`fastapi.Request`. + + ``client_ip`` only reads ``request.headers`` and ``request.client``, + so a MagicMock is enough — we avoid spinning up a real Starlette + Request just for header lookup. + """ + request = MagicMock() + request.headers = headers + if peer is None: + request.client = None + else: + client = MagicMock() + client.host = peer + request.client = client + return request + + +# ── RateLimiter ────────────────────────────────────────────────────────────── + + +def test_limiter_allows_up_to_limit() -> None: + rl = RateLimiter(limit=3, window_seconds=60) + for _ in range(3): + allowed, retry = rl.check("ip1", now=100.0) + assert allowed is True + assert retry == 0 + + +def test_limiter_rejects_over_limit_with_retry_after() -> None: + rl = RateLimiter(limit=2, window_seconds=60) + rl.check("ip1", now=100.0) + rl.check("ip1", now=110.0) + allowed, retry = rl.check("ip1", now=120.0) + assert allowed is False + # Oldest entry was at 100; window 60s → ages out at 160; +1 buffer. + assert retry == 41 + + +def test_limiter_separates_keys() -> None: + rl = RateLimiter(limit=1, window_seconds=60) + assert rl.check("a", now=100.0) == (True, 0) + assert rl.check("b", now=100.0) == (True, 0) + assert rl.check("a", now=100.0)[0] is False + assert rl.check("b", now=100.0)[0] is False + + +def test_limiter_window_expiry_restores_quota() -> None: + rl = RateLimiter(limit=1, window_seconds=10) + assert rl.check("ip", now=100.0)[0] is True + assert rl.check("ip", now=105.0)[0] is False + # 11s past the original entry — it has aged out of the 10s window. + assert rl.check("ip", now=111.0)[0] is True + + +def test_limiter_uses_real_clock_when_now_is_none() -> None: + """A bare ``check(key)`` call should still work — we just want + a smoke test that the wall-clock branch exercises without + raising. Two calls back-to-back are within the default window + so both are allowed under a generous limit.""" + rl = RateLimiter(limit=10, window_seconds=60) + assert rl.check("ip")[0] is True + assert rl.check("ip")[0] is True + + +def test_limiter_evicts_oldest_when_max_keys_hit() -> None: + """A flood of unique IPs must not grow the bucket map without bound. + + With ``max_keys=2``, inserting a third key evicts the first. + We can't see the dict directly, but the eviction behaviour is + observable: the evicted key gets a fresh bucket on its next + request even after it was previously over-quota. + """ + rl = RateLimiter(limit=1, window_seconds=60, max_keys=2) + rl.check("a", now=1.0) # bucket for "a" full + rl.check("b", now=2.0) # bucket for "b" full + rl.check("c", now=3.0) # forces eviction of "a" + # If "a" had been preserved, this would be denied. + assert rl.check("a", now=4.0)[0] is True + + +def test_limiter_rejects_zero_or_negative_limit() -> None: + with pytest.raises(ValueError): + RateLimiter(limit=0, window_seconds=60) + with pytest.raises(ValueError): + RateLimiter(limit=-1, window_seconds=60) + with pytest.raises(ValueError): + RateLimiter(limit=1, window_seconds=0) + + +# ── client_ip ──────────────────────────────────────────────────────────────── + + +def test_client_ip_prefers_fly_client_ip_header() -> None: + request = _request( + headers={ + "fly-client-ip": "203.0.113.1", + "x-forwarded-for": "198.51.100.2", + }, + peer="10.0.0.1", + ) + assert client_ip(request) == "203.0.113.1" + + +def test_client_ip_falls_back_to_xff_first_hop() -> None: + request = _request( + headers={"x-forwarded-for": "203.0.113.1, 198.51.100.2, 10.0.0.1"}, + peer="10.0.0.1", + ) + assert client_ip(request) == "203.0.113.1" + + +def test_client_ip_falls_back_to_socket_peer() -> None: + request = _request(headers={}, peer="127.0.0.1") + assert client_ip(request) == "127.0.0.1" + + +def test_client_ip_returns_unknown_when_no_signal() -> None: + request = _request(headers={}, peer=None) + assert client_ip(request) == "unknown" diff --git a/tests/test_web.py b/tests/test_web.py index e3adc42..ac2df84 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -37,6 +37,7 @@ from trailstory.config import Settings from trailstory.llm.client import AnthropicClient from web.app import create_app +from web.ratelimit import RateLimiter from web.routes import ( MAX_GPX_BYTES, MAX_PHOTO_BYTES, @@ -130,12 +131,14 @@ def _app_with_storage( storage: Storage, *, client: MagicMock | None = None, + rate_limiter: RateLimiter | None = None, ) -> tuple[FastAPI, MagicMock]: fake = client if client is not None else _make_client() app = create_app( settings=_settings(), storage=storage, client_factory=lambda: fake, + rate_limiter=rate_limiter, enable_sweeper=False, ) return app, fake @@ -296,6 +299,26 @@ def test_healthz_returns_status_ok(client: TestClient) -> None: assert response.json() == {"status": "ok"} +def test_version_reports_git_sha_from_env( + client: TestClient, monkeypatch: pytest.MonkeyPatch +) -> None: + """``GET /version`` echoes ``GIT_SHA`` for deploy traceability.""" + monkeypatch.setenv("GIT_SHA", "abc1234") + response = client.get("/version") + assert response.status_code == 200 + body = response.json() + assert body["git_sha"] == "abc1234" + assert body["version"] == "0.1.0" + + +def test_version_falls_back_to_unknown(client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None: + """Local runs without ``GIT_SHA`` set still return a well-formed payload.""" + monkeypatch.delenv("GIT_SHA", raising=False) + response = client.get("/version") + assert response.status_code == 200 + assert response.json() == {"version": "0.1.0", "git_sha": "unknown"} + + # ── happy path ─────────────────────────────────────────────────────────────── @@ -622,6 +645,70 @@ def test_generate_rejects_oversized_photo( assert "Photo exceeds" in response.json()["detail"] +# ── rate limit ─────────────────────────────────────────────────────────────── + + +def test_generate_returns_429_when_over_rate_limit(storage: Storage) -> None: + """A second call from the same client after hitting the cap gets 429. + + Uses a tiny limit so the rejection path is reachable in two calls. + The 429 must include a positive ``Retry-After`` header — that is + the public contract the client UI can rely on. + """ + app, _ = _app_with_storage( + storage, + rate_limiter=RateLimiter(limit=1, window_seconds=3600), + ) + with TestClient(app) as c: + first = c.post( + "/generate", + data={ + "description": "The fog cleared just as we reached the ridge.", + "style": "editorial", + }, + files=_generate_files(), + ) + second = c.post( + "/generate", + data={"description": "Same client trying again.", "style": "editorial"}, + files=_generate_files(), + ) + assert first.status_code == 200 + assert second.status_code == 429 + assert int(second.headers["retry-after"]) > 0 + assert "Too many memory generations" in second.json()["detail"] + + +def test_generate_rate_limit_keys_on_fly_client_ip(storage: Storage) -> None: + """Two distinct ``Fly-Client-IP`` values get independent buckets.""" + app, _ = _app_with_storage( + storage, + rate_limiter=RateLimiter(limit=1, window_seconds=3600), + ) + with TestClient(app) as c: + first = c.post( + "/generate", + data={"description": "Client A.", "style": "editorial"}, + files=_generate_files(), + headers={"Fly-Client-IP": "203.0.113.1"}, + ) + second = c.post( + "/generate", + data={"description": "Client B.", "style": "editorial"}, + files=_generate_files(), + headers={"Fly-Client-IP": "203.0.113.2"}, + ) + third = c.post( + "/generate", + data={"description": "Client A again.", "style": "editorial"}, + files=_generate_files(), + headers={"Fly-Client-IP": "203.0.113.1"}, + ) + assert first.status_code == 200 + assert second.status_code == 200 + assert third.status_code == 429 + + # ── memory page / carousel 404s ────────────────────────────────────────────── diff --git a/web/app.py b/web/app.py index 6ff905b..92e0b2c 100644 --- a/web/app.py +++ b/web/app.py @@ -28,6 +28,7 @@ from trailstory.config import Settings, load_settings from trailstory.llm.client import AnthropicClient +from web.ratelimit import GENERATE_LIMIT_PER_HOUR, GENERATE_WINDOW_SECONDS, RateLimiter from web.routes import router from web.storage import Storage @@ -48,6 +49,7 @@ def create_app( settings: Settings | None = None, storage: Storage | None = None, client_factory: Callable[[], AnthropicClient] | None = None, + rate_limiter: RateLimiter | None = None, enable_sweeper: bool = True, ) -> FastAPI: """Build a configured FastAPI app. @@ -61,6 +63,10 @@ def create_app( client_factory: Callable returning a configured ``AnthropicClient``. Tests inject a factory that returns a ``MagicMock`` so the real SDK is never reached. + rate_limiter: Per-IP limiter for ``/generate``. Defaults to a + sliding-window ``RateLimiter`` sized at + :data:`web.ratelimit.GENERATE_LIMIT_PER_HOUR`. Tests pass + a tiny limit so the 429 path is reachable in a few calls. enable_sweeper: When ``True`` (default), the retention sweep is scheduled on app startup. Tests disable this so they can assert sweep behaviour by calling ``Storage.sweep_expired`` @@ -71,6 +77,14 @@ def create_app( resolved_factory = ( client_factory if client_factory is not None else _default_client_factory(resolved_settings) ) + resolved_limiter = ( + rate_limiter + if rate_limiter is not None + else RateLimiter( + limit=GENERATE_LIMIT_PER_HOUR, + window_seconds=GENERATE_WINDOW_SECONDS, + ) + ) @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncIterator[None]: @@ -103,6 +117,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: app.state.settings = resolved_settings app.state.storage = resolved_storage app.state.client_factory = resolved_factory + app.state.generate_limiter = resolved_limiter app.state.templates = Jinja2Templates(directory=str(TEMPLATE_DIR)) if STATIC_DIR.is_dir(): diff --git a/web/ratelimit.py b/web/ratelimit.py new file mode 100644 index 0000000..7bfd506 --- /dev/null +++ b/web/ratelimit.py @@ -0,0 +1,166 @@ +"""In-process per-IP rate limit for the expensive ``/generate`` route. + +A single ``/generate`` request triggers an Anthropic narrative call +costing roughly $0.10-$0.30. Without a cap, one abusive client could +burn through the published Anthropic spend ceiling in minutes — Fly's +own pay-as-you-go tier exposes no hard cap on most accounts, so the +defense lives in the app. + +Implementation: + +* Sliding-window counter per client IP. Each ``check`` evicts + expired timestamps from the head of the per-IP deque before + comparing length against the configured limit. +* Bounded total size — when ``max_keys`` is reached we drop the + least-recently-inserted IP. Python's dict insertion-order semantics + give us LRU-by-insert for free, which is good enough to bound RAM + under a flood of unique source IPs. +* Thread-safe via a single ``threading.Lock`` around the bucket + map. uvicorn's worker model means concurrent requests can land at + the same time; the lock hold is microseconds so contention is not + meaningful at v0 traffic. +* In-process state only. A restart wipes the counters; that's fine + because the window is short (one hour) and Fly's HA pair sees the + same client through different machines, leaking at most 2x the + per-IP cap. Across-replica accuracy would need Redis, which is + more infrastructure than v0 warrants. + +Behind Fly's edge proxy ``request.client.host`` is one of Fly's edge +IPs, identical for all clients. The real client lives in +``Fly-Client-IP`` — :func:`client_ip` reads that first, falling back +to ``X-Forwarded-For`` for non-Fly deployments and finally to the +socket peer for local development. +""" + +from __future__ import annotations + +import threading +import time +from collections import deque +from typing import Final + +from fastapi import HTTPException, Request + +# Per-IP cap. Picked so a single client can iterate a few times on +# their own memory (3-5 prompt tweaks plus the carousel call) without +# friction, while bounding the worst case to roughly $2/hour of +# Anthropic spend per IP. +GENERATE_LIMIT_PER_HOUR: Final[int] = 10 +GENERATE_WINDOW_SECONDS: Final[int] = 60 * 60 + +# Hard ceiling on the IP map so a flood of unique source IPs cannot +# OOM the process. 10k entries is ~hundreds of KB of deques even at +# the per-IP cap; far below the 512 MB Fly machine. +DEFAULT_MAX_KEYS: Final[int] = 10_000 + + +class RateLimiter: + """Sliding-window per-key request counter. + + One instance per app, hung off ``app.state.generate_limiter``. + Tests construct their own with a tiny ``limit`` so the 429 path + is reachable in a few requests without sleeping. + """ + + __slots__ = ("_buckets", "_limit", "_lock", "_max_keys", "_window") + + def __init__( + self, + *, + limit: int, + window_seconds: int, + max_keys: int = DEFAULT_MAX_KEYS, + ) -> None: + if limit <= 0: + raise ValueError("limit must be positive") + if window_seconds <= 0: + raise ValueError("window_seconds must be positive") + self._limit = limit + self._window = window_seconds + self._max_keys = max_keys + self._buckets: dict[str, deque[float]] = {} + self._lock = threading.Lock() + + def check(self, key: str, *, now: float | None = None) -> tuple[bool, int]: + """Record a request attempt for ``key`` and return the verdict. + + Returns ``(allowed, retry_after_seconds)``. When ``allowed`` + is ``False``, ``retry_after_seconds`` is the integer-second + wait until the oldest in-window entry ages out — suitable + for a ``Retry-After`` header. + """ + ts = now if now is not None else time.monotonic() + cutoff = ts - self._window + with self._lock: + bucket = self._buckets.get(key) + if bucket is None: + if len(self._buckets) >= self._max_keys: + # Drop the LRU-by-insert entry. We re-insert keys + # below by ``self._buckets[key] = bucket``, which + # does NOT refresh insertion order in Python's + # dict — but ``check`` for an existing key + # short-circuits the eviction branch entirely, so + # the eviction here only fires for genuinely new + # keys and the LRU semantics hold. + self._buckets.pop(next(iter(self._buckets))) + bucket = deque() + self._buckets[key] = bucket + while bucket and bucket[0] <= cutoff: + bucket.popleft() + if len(bucket) >= self._limit: + # +1 so a client that obeys Retry-After lands just + # past the boundary instead of bouncing again on the + # rounding floor. + retry_after = max(1, int(bucket[0] + self._window - ts) + 1) + return False, retry_after + bucket.append(ts) + return True, 0 + + +def client_ip(request: Request) -> str: + """Best-effort extraction of the originating client IP. + + Trust order: + + 1. ``Fly-Client-IP`` — set by Fly's edge proxy and not forwardable + by the client. This is the truth on Fly. + 2. ``X-Forwarded-For`` — first hop. Used by other proxies. A + hostile client behind a non-Fly deployment could spoof this, + but the rate-limit blast radius is limited to one IP at a + time, so the worst case is "evades the limit" not "breaks the + process". + 3. ``request.client.host`` — the socket peer. The right answer + for a direct connection (local dev). On Fly this would be one + of the edge IPs, useless for rate limiting. + """ + fly_ip = request.headers.get("fly-client-ip") + if fly_ip: + return fly_ip + forwarded = request.headers.get("x-forwarded-for") + if forwarded: + return forwarded.split(",")[0].strip() + if request.client is not None: + return request.client.host + return "unknown" + + +def enforce_generate_limit(request: Request) -> None: + """FastAPI dependency for the ``/generate`` route. + + Fires before the form body is parsed (the dependency only takes + ``Request``, so FastAPI does not materialise multipart fields to + resolve it). A rate-limited client therefore gets a 429 without + us reading the upload — the bandwidth still costs us a little + but the expensive Anthropic call is cleanly avoided. + """ + limiter: RateLimiter = request.app.state.generate_limiter + allowed, retry_after = limiter.check(client_ip(request)) + if not allowed: + raise HTTPException( + status_code=429, + detail=( + "Too many memory generations from your address. " + f"Try again in {retry_after} seconds." + ), + headers={"Retry-After": str(retry_after)}, + ) diff --git a/web/routes.py b/web/routes.py index 6550c7c..8f6b1e3 100644 --- a/web/routes.py +++ b/web/routes.py @@ -1,6 +1,6 @@ """HTTP route handlers for the web builder. -Eight endpoints, all stateless from the user's point of view: +Nine endpoints, all stateless from the user's point of view: * ``GET /`` — landing page + builder form. * ``POST /generate`` — multipart upload; runs the prep @@ -14,6 +14,8 @@ * ``POST /memory/{slug}/carousel`` — generates the IG carousel on demand. * ``GET /privacy`` — plain-language privacy page. * ``GET /healthz`` — uptime probe. +* ``GET /version`` — build identity (git SHA from the + deploy image). * ``GET /memory/{slug}/carousel/{filename}`` — serves a single slide. Heavy lifting (parse / load / narrative / render) lives in @@ -27,12 +29,13 @@ import json import logging +import os import threading from collections.abc import Callable, Iterable, Iterator from pathlib import Path from typing import Annotated, Final -from fastapi import APIRouter, BackgroundTasks, Form, HTTPException, Request, UploadFile +from fastapi import APIRouter, BackgroundTasks, Depends, Form, HTTPException, Request, UploadFile from fastapi.responses import ( FileResponse, HTMLResponse, @@ -54,6 +57,7 @@ render_carousel, stream_pipeline, ) +from web.ratelimit import enforce_generate_limit from web.storage import Storage, Workspace logger = logging.getLogger(__name__) @@ -119,10 +123,30 @@ async def healthz() -> dict[str, str]: return {"status": "ok"} +@router.get("/version") +async def version() -> dict[str, str]: + """Build identity for the running image. + + ``git_sha`` is injected at image build time via the ``GIT_SHA`` + Docker build arg (see Dockerfile + ``make deploy``); local runs + fall through to ``"unknown"``. ``version`` mirrors the value + declared in ``pyproject.toml`` so a deployed bug can be tied back + to a specific commit + release without log archaeology. + """ + return { + "version": "0.1.0", + "git_sha": os.environ.get("GIT_SHA", "unknown"), + } + + # ── pipeline ───────────────────────────────────────────────────────────────── -@router.post("/generate", response_class=HTMLResponse) +@router.post( + "/generate", + response_class=HTMLResponse, + dependencies=[Depends(enforce_generate_limit)], +) async def generate( request: Request, background_tasks: BackgroundTasks, @@ -139,6 +163,11 @@ async def generate( (``pending.json``) before responding so the SSE endpoint can pick up even after the BackgroundTask has wiped the raw uploads. + The route is rate-limited per client IP via + :func:`web.ratelimit.enforce_generate_limit`; an over-quota client + gets a 429 with a ``Retry-After`` header before the multipart body + is parsed. + Raises a 4xx if the inputs are missing, oversized, or unsupported; pipeline parse / photo-load errors surface as 400 here rather than in the SSE stream so the user gets immediate feedback.