diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b224541..b21ce0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: # runner. pynput stays in the list, but the conftest installs a # stub when its real import fails (headless Linux has no X11). pip install pytest fastmcp httpx requests pynput pydantic ruff \ - fastapi numpy + fastapi numpy pyotp 'qrcode[pil]' - name: Lint (ruff) — F-4 gate, config in ruff.toml run: ruff check . - name: Skill import smoke (standalone script, not a pytest module) diff --git a/.gitignore b/.gitignore index cfafb11..f69cbd3 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,9 @@ build/ *.egg-info/ *.egg *.whl + +# Test / coverage artifacts +.coverage +.coverage.* +htmlcov/ +.pytest_cache/ diff --git a/codec_bridges.py b/codec_bridges.py index a63842a..b582666 100644 --- a/codec_bridges.py +++ b/codec_bridges.py @@ -23,9 +23,19 @@ MEMORY_DB = os.path.expanduser("~/.codec/memory.db") -# Skills that need a terminal / GUI and must not run from a headless bridge. -_SKIP_SKILLS = {"open_terminal", "run_command", "vibe_code", "deep_chat", - "memory_search", "ask_mike_to_build"} +# C2 (Fix #2): inbound bridge messages may ONLY dispatch skills on this explicit +# safe list — read / info / pure-compute skills with no system side effects, no +# private-data disclosure, and no device/GUI control. Default-deny: anything not +# listed is NOT dispatched from a bridge (try_skill returns (None, None) and the +# caller degrades to an LLM answer — never a hard fail). This is the security +# boundary that keeps high-power skills (terminal, python_exec, file_write, +# pilot, process_manager, pm2_control, ax_control) unreachable from a (remote) +# bridge, replacing the old allow-by-default `_SKIP_SKILLS` denylist which did +# NOT exclude them. Skills are added here deliberately, one at a time. +BRIDGE_SAFE_SKILLS = frozenset({ + "weather", "time", "calculator", "translate", "bitcoin_price", + "web_search", "json_formatter", "password_generator", "qr_generator", +}) # Per-channel assistant persona (the only thing that differed between the two # bridges' call_llm). `{now}` is the formatted current date/time. @@ -69,14 +79,23 @@ def load_dispatch() -> bool: def try_skill(text): - """Match a CODEC skill for `text`. Returns (skill_name, result) or - (None, None) — skipping terminal/GUI skills that a bridge can't run.""" + """Match a CODEC skill for `text` and run it IF it is on BRIDGE_SAFE_SKILLS. + + Returns (skill_name, result) for a dispatched safe skill, or (None, None) + when nothing safe matched — in which case the caller falls back to an LLM + answer (graceful degradation, never a hard fail). C2 fail-closed: any skill + not on the allowlist (every high-power skill included) is never dispatched + from a bridge. + """ if not load_dispatch(): return (None, None) try: skill = _check_skill(text) if skill: - if skill["name"] in _SKIP_SKILLS: + if skill["name"] not in BRIDGE_SAFE_SKILLS: + log.info( + "Skill '%s' not on BRIDGE_SAFE_SKILLS — not dispatched from " + "bridge (falling back to LLM)", skill["name"]) return (None, None) result = _run_skill(skill, text) if result: diff --git a/codec_dashboard.py b/codec_dashboard.py index 4a61630..978d55b 100644 --- a/codec_dashboard.py +++ b/codec_dashboard.py @@ -2215,8 +2215,11 @@ async def web_search_endpoint(request: Request): "terminal", # File operations (read, write, append, list — path-restricted) "file_ops", - # Python execution (sandboxed, blocked dangerous imports) - "python_exec", + # NOTE: python_exec is intentionally NOT on this allowlist (audit C3). + # It stays a local skill but is no longer auto-firable from a chat message + # (pre-LLM hijack / post-LLM [SKILL:...] tag both gate on this set), so an + # injection-style chat message can't drive arbitrary code execution. + # SKILL_MCP_EXPOSE=False already keeps it off MCP. # Google services "google_calendar", "google_gmail", "google_docs", "google_drive", "google_sheets", "google_keep", @@ -2265,7 +2268,6 @@ async def web_search_endpoint(request: Request): [SKILL:weather:weather in Paris] [SKILL:terminal:ls -la ~/Documents] [SKILL:file_ops:read file ~/notes.txt] -[SKILL:python_exec:run python print(2**100)] [SKILL:pm2_control:pm2 list] [SKILL:google_calendar:what's on my calendar today] The skill's real output replaces the tag automatically — emit the tag and stop, never fabricate the result. diff --git a/codec_imessage.py b/codec_imessage.py index 1816f1c..15b4d5c 100644 --- a/codec_imessage.py +++ b/codec_imessage.py @@ -60,7 +60,7 @@ def get_imessage_config(cfg): im = cfg.get("imessage", {}) return { "enabled": im.get("enabled", True), - "allowed_senders": im.get("allowed_senders", []), # empty = allow all + "allowed_senders": im.get("allowed_senders", []), # empty = DENY all (fail-closed, C2) "blocked_senders": im.get("blocked_senders", []), "poll_interval": im.get("poll_interval", 3), # seconds "max_response_length": im.get("max_response_length", 4000), @@ -241,8 +241,19 @@ def _convert_apple_date(apple_date): # ── Sender filtering ──────────────────────────────────────────────────────── +# C2 (Fix #2): one-time "no allowlist configured" warning flag. Module-level so +# the fail-closed deny logs ONCE per process instead of once per inbound message. +_warned_no_allowlist = False + + def is_sender_allowed(sender, im_cfg): - """Check if sender is allowed based on allowlist/blocklist.""" + """Check if sender is allowed based on allowlist/blocklist. + + FAIL-CLOSED (C2): an EMPTY (or missing) ``allowed_senders`` denies ALL + inbound. A bridge with no configured allowlist must not respond to arbitrary + senders just because someone learned the handle. The operator enables replies + by adding their own handle to ``config.imessage.allowed_senders``. + """ if not sender: return False @@ -252,7 +263,16 @@ def is_sender_allowed(sender, im_cfg): return False allowed = im_cfg.get("allowed_senders", []) - if allowed and sender not in allowed: + if not allowed: + global _warned_no_allowlist + if not _warned_no_allowlist: + log.warning( + "bridge_no_allowlist: imessage allowed_senders is empty — " + "denying ALL inbound (fail-closed, C2). Add your handle to " + "config.imessage.allowed_senders to enable replies.") + _warned_no_allowlist = True + return False + if sender not in allowed: log.info(f"Sender not in allowlist: {sender}") return False diff --git a/codec_oauth_provider.py b/codec_oauth_provider.py index d9f2547..8a3a1ee 100644 --- a/codec_oauth_provider.py +++ b/codec_oauth_provider.py @@ -22,7 +22,6 @@ import json import os -import stat import time import secrets import threading @@ -34,6 +33,8 @@ from fastmcp.server.auth.providers.in_memory import InMemoryOAuthProvider +from codec_jsonstore import atomic_write_json + try: from codec_audit import log_event as _oauth_log_event except ImportError: # pragma: no cover — audit unavailable shouldn't break OAuth @@ -142,7 +143,8 @@ def _save(self): # legacy plaintext path so OAuth keeps working — operational # continuity > strict secret isolation. with self._lock: - blob = json.dumps(self._serialize()) + state = self._serialize() + blob = json.dumps(state) kc_ok = False try: from codec_keychain import set_oauth_state @@ -159,12 +161,15 @@ def _save(self): pass return - # Fallback: legacy plaintext file (0600). Logged as a warning - # at the keychain layer; OAuth continues to function. - tmp = self._state_path.with_suffix(".tmp") - tmp.write_text(blob) - os.chmod(tmp, stat.S_IRUSR | stat.S_IWUSR) # 0600 - os.replace(tmp, self._state_path) + # Fallback: legacy plaintext file. Crash-durable write via the + # canonical jsonstore helper — unique tmp + flush + os.fsync + + # atomic os.replace + chmod 0600. C6 (Fix #1b): the previous + # `tmp.write_text(blob); os.replace(...)` skipped fsync, so a + # crash between write() and the page-cache flush could land a + # truncated/empty oauth_state.json and lose every token + # (claude.ai forced re-auth on next restart). atomic_write_json + # closes that durability window. + atomic_write_json(self._state_path, state) # ---------- overrides: persist after every mutation ---------- diff --git a/codec_sandbox.py b/codec_sandbox.py index 4524311..ac6d289 100644 --- a/codec_sandbox.py +++ b/codec_sandbox.py @@ -27,6 +27,7 @@ ;; Allow reading most files (skills need imports) (allow file-read*) +{read_deny_rules} ;; Allow writing ONLY to skill output dir and temp (allow file-write* (subpath "{skill_output}") @@ -65,6 +66,30 @@ (deny network-inbound) """ +# C3 (Fix #3): targeted read-denies layered AFTER the broad (allow file-read*). +# SBPL is last-match-wins, so these win over the broad allow for these specific +# paths while leaving stdlib / site-packages / interpreter reads working (the +# "don't break legitimate imports" constraint). Closes the python_exec / +# sandboxed-skill secret-exfil vector: ~/.ssh, ~/.aws, ~/.gnupg, ~/.config/gh, +# the macOS Keychain dirs, and the ~/.codec OAuth-token + fallback-secret files. +# NOTE: config.json is deliberately NOT denied — a sandboxed skill may +# `import codec_config`, which reads it at import time (its secrets are +# Keychain-backed since PR-2B, so config.json is no longer a secret store). +_READ_DENY_TEMPLATE = """\ +;; C3: deny reads of credential / secret paths (last-match-wins over the +;; broad allow above). Leaves stdlib imports intact. +(deny file-read* + (subpath "{home}/.ssh") + (subpath "{home}/.aws") + (subpath "{home}/.gnupg") + (subpath "{home}/.config/gh") + (subpath "{home}/Library/Keychains") + (subpath "/Library/Keychains") + (literal "{codec_dir}/oauth_state.json") + (literal "{codec_dir}/secret.key") + (literal "{codec_dir}/secrets.enc.json")) +""" + def _ensure_dirs(): os.makedirs(SKILL_OUTPUT_DIR, exist_ok=True) @@ -75,9 +100,14 @@ def _write_sandbox_profile(allow_network: bool = False) -> str: """Write a sandbox.sb profile and return its path.""" _ensure_dirs() network_rules = _NETWORK_ALLOW if allow_network else _NETWORK_DENY + read_deny_rules = _READ_DENY_TEMPLATE.format( + home=os.path.expanduser("~"), + codec_dir=_CODEC_DIR, + ) profile = _SANDBOX_PROFILE_TEMPLATE.format( skill_output=SKILL_OUTPUT_DIR, network_rules=network_rules, + read_deny_rules=read_deny_rules, ) with open(SANDBOX_PROFILE_PATH, "w") as f: f.write(profile) diff --git a/codec_telegram.py b/codec_telegram.py index bb81588..9e844a2 100644 --- a/codec_telegram.py +++ b/codec_telegram.py @@ -67,7 +67,7 @@ def get_telegram_config(cfg): bot_token = tg.get("bot_token", "") return { "bot_token": bot_token, - "allowed_chat_ids": tg.get("allowed_chat_ids", []), # empty = allow all + "allowed_chat_ids": tg.get("allowed_chat_ids", []), # empty = DENY all (fail-closed, C2) "require_trigger": tg.get("require_trigger", False), # DMs don't need trigger by default "max_response_length": tg.get("max_response_length", 4000), } @@ -492,6 +492,33 @@ def save_to_memory(chat_id, user_text, assistant_text): return codec_bridges.save_to_memory("telegram", chat_id, user_text, assistant_text) +# ── Chat filtering ────────────────────────────────────────────────────────── +# C2 (Fix #2): one-time "no allowlist configured" warning flag (mirrors +# codec_imessage). Module-level so the fail-closed deny logs ONCE per process. +_warned_no_allowlist = False + + +def is_chat_allowed(chat_id, tg_cfg): + """Whether `chat_id` may drive CODEC over the Telegram bridge. + + FAIL-CLOSED (C2): an EMPTY (or missing) ``allowed_chat_ids`` denies ALL + inbound — anyone who learns the bot token must NOT be able to drive CODEC. + The operator enables replies by adding their own chat id to + ``config.telegram.allowed_chat_ids``. + """ + allowed = tg_cfg.get("allowed_chat_ids", []) + if not allowed: + global _warned_no_allowlist + if not _warned_no_allowlist: + log.warning( + "bridge_no_allowlist: telegram allowed_chat_ids is empty — " + "denying ALL inbound (fail-closed, C2). Add your chat id to " + "config.telegram.allowed_chat_ids to enable replies.") + _warned_no_allowlist = True + return False + return chat_id in allowed + + # ── Process message ───────────────────────────────────────────────────────── def process_message(bot, update, tg_cfg, llm_cfg): msg = update.get("message", {}) @@ -504,10 +531,9 @@ def process_message(bot, update, tg_cfg, llm_cfg): if not chat_id: return - # Chat ID filter - allowed = tg_cfg.get("allowed_chat_ids", []) - if allowed and chat_id not in allowed: - log.info(f"Chat {chat_id} not in allowlist — ignored") + # Chat ID filter — FAIL-CLOSED (C2): empty allowed_chat_ids denies all. + if not is_chat_allowed(chat_id, tg_cfg): + log.info(f"Chat {chat_id} not allowed — ignored") return # ── Extract text ───────────────────────────────────────────────────── diff --git a/docs/SECURITY-REMEDIATION-DESIGN.md b/docs/SECURITY-REMEDIATION-DESIGN.md new file mode 100644 index 0000000..b99eb56 --- /dev/null +++ b/docs/SECURITY-REMEDIATION-DESIGN.md @@ -0,0 +1,273 @@ +# SECURITY-REMEDIATION-DESIGN + +> Remediation plan for the top-10 findings from the 2026-05-28 full code audit. +> This is the CLAUDE.md §11 design-first gate: **no working code has been edited.** +> Implementation begins only after the operator approves the order below +> (and explicitly signs off on the items flagged DECISION-REQUIRED). + +**Author:** audit follow-up · **Date:** 2026-05-28 · **Status:** AWAITING APPROVAL +**Source audit:** 9 specialist reviewers · 1,910 tests baseline (1,818 pass / 9 fail / 83 skip / 1 collection error) + +--- + +## 0. Verification baseline (ground-truthed before writing this doc) + +Every line reference below was re-read against current `HEAD` on 2026-05-28, not trusted from the audit summary. Corrections found during verification: + +| Claim in audit | Verified reality | Effect on plan | +|---|---|---| +| OAuth TTL 365d is a finding | `codec_oauth_provider.py:48-57` — **deliberately set to 365d TODAY** with documented threat-model rationale | **Dropped.** Don't-touch zone + maintainer decision. Not touched. | +| C6 `_save()` reads state pre-lock | `_serialize()` is called **inside** `self._lock` (line 144-145) | Narrowed: real gap is **no fsync** on fallback path (164-167) | +| C5 SQLite has no concurrency guard | True (`check_same_thread=False`, no lock, line 44) — but WAL + `busy_timeout=5000` are set (45-46) | Real, but lower blast radius than stated | +| C1 TOTP confirm trusts client secret | Confirmed verbatim (`routes/auth.py:194-218`) — `secret` taken from request body, verified against itself, written to config | Unchanged — genuine takeover | +| C3 python_exec in chat allowlist | Confirmed (`codec_dashboard.py:2219`) + sandbox `(allow file-read*)` (`codec_sandbox.py:28`) | Unchanged | +| C2 bridge fail-open | Confirmed (`codec_telegram.py:70` default `[]`, `:509` `if allowed and …`) | Unchanged | + +--- + +## 1. Scope + +**In scope:** the 9 Critical + selected High findings, sequenced as the audit's top-10 fix order. + +**Explicitly OUT of scope (don't-touch zones — require separate, explicit operator sign-off, NOT bundled here):** +- `ACCESS_TOKEN_TTL` / `REFRESH_TOKEN_TTL` (`codec_oauth_provider.py:56-57`) — maintainer-set 365d, documented today. +- `codec_audit.py` audit envelope schema (schema:1). +- `_HTTP_BLOCKED` membership in `codec_config.py` (we ADD a CHAT-path restriction in Fix #3; we do not alter `_HTTP_BLOCKED` itself). +- PM2 process names (Fix #6 changes the *interpreter path*, never a process `name:`). +- `codec_identity.py` operating principles. + +**Standing constraints honored throughout:** no synthetic code; no data deletion; verify wiring before and after each edit; one concern per commit; tests green after each file change; never auto-deploy skill proposals. + +--- + +## 2. Phased remediation plan + +### PHASE A — Critical security blockers (the release gate) + +These three are exploitable today and gate the next release. + +--- + +#### Fix #1 — Authenticated-TOTP enrollment + OAuth durability + fastmcp install + +**Closes:** C1 (TOTP takeover), C6 (OAuth fsync), C7 (fastmcp missing/CVE — *test-env + install only; TTL untouched*). + +**1a. C1 — `routes/auth.py:194-218` (`/api/auth/totp/confirm`)** + +*Problem:* the endpoint accepts `secret` from the request body, calls `pyotp.TOTP(secret).verify(code)` (which the attacker satisfies because they generated both), then writes that attacker-chosen secret to `config.json:totp_secret`. If the endpoint is reachable without an established auth session (it is — `/api/auth/*` must be pre-auth by design), an unauthenticated attacker enrolls their own 2FA secret → account takeover. + +*Change (two independent layers):* +1. **Require an established session to enroll.** `confirm` must reject unless the caller presents a valid `_auth_sessions` token (i.e. they already passed Touch ID / PIN). Enrolling 2FA is a logged-in action, not a pre-auth one. +2. **Server-owns-the-secret.** `/api/auth/totp/setup` already generates the secret server-side (line 184). Stash it in a short-TTL, session-keyed pending map (`_pending_totp_secrets[token] = {secret, created}`); `confirm` verifies the code against the **pending server-stored** secret, never a body-supplied one. Remove `secret` from the confirm request contract. + +*Migration/compat:* PWA setup flow currently posts `{code, secret}`; it will post `{code}` + auth token. One-line frontend change in `codec_dashboard.html` TOTP panel (tracked as a sub-task). Existing enrolled users unaffected (their `totp_secret` already in config; `/verify` path at 221-239 unchanged). + +*Test:* new `tests/test_auth_totp.py` — (a) unauthenticated confirm → 401/403; (b) authenticated confirm with body-supplied foreign secret → ignored, server secret used; (c) happy-path enroll → verify round-trip. + +*Rollback:* revert `routes/auth.py` + the 1-line HTML change. No persisted state migration, so rollback is clean. + +*Risk/flags:* touches the auth path — **DECISION-REQUIRED** on layer choice (see §3, Decision A). + +**1b. C6 — `codec_oauth_provider.py:164-167` (`_save` fallback)** + +*Problem:* the Keychain-unavailable fallback writes plaintext via `tmp.write_text(blob); os.replace(...)` with **no fsync** → crash-window data loss/corruption of `oauth_state.json`. + +*Change:* replace the hand-rolled tmp+replace with `codec_jsonstore.atomic_write_json(self._state_path, json.loads(blob))` (already does tmp **+ fsync +** replace + 0600). Keep the `with self._lock:` and the Keychain-first branch exactly as-is. + +*Migration/compat:* none — same file, same format, same 0600 perms. `oauth_state.json` is a don't-touch zone *for clearing/TTL*, but improving the write durability of the fallback path is non-breaking (no schema change, no TTL change). Will surface to operator before commit per don't-touch protocol. + +*Test:* `tests/test_oauth_provider.py` (currently collection-erroring — unblocked by 1c) — add a fallback-path durability test (monkeypatch Keychain unavailable, assert atomic write + 0600). + +*Rollback:* one-function revert. + +**1c. C7 — fastmcp install in test/CI env (NOT a TTL or runtime-behavior change)** + +*Problem:* `fastmcp` not importable in the python3.11 test env → `tests/test_oauth_provider.py` collection error + 6 downstream test files can't run + 9 of 9 suite failures trace here. + +*Change:* (1) **Verify** the exact advisory + safe version via `pip-audit` / registry before pinning — do NOT assert a CVE id/version on faith. (2) Add the verified safe `fastmcp` pin to `requirements.txt` + the CI workflow's install step. (3) Re-run the suite; expect the 9 failures + collection error to clear. + +*Migration/compat:* dependency addition to an already-declared import (`codec_oauth_provider.py:35` already imports it). No runtime behavior change. + +*Test:* `pytest --collect-only` clean (no collection error); full suite failures → 0 attributable to fastmcp. + +*Rollback:* unpin / remove from requirements. + +--- + +#### Fix #2 — Fail-closed message bridges + bridge skill allowlist + +**Closes:** C2. + +*Problem:* `codec_telegram.py:70` defaults `allowed_chat_ids` to `[]`, and `:509` is `if allowed and chat_id not in allowed:` → empty list = **allow all**. Anyone who learns the bot token can drive CODEC. `codec_bridges.py:27` `_SKIP_SKILLS` doesn't exclude `terminal`/`python_exec`/`file_write`/`pilot`, so an inbound message can reach high-power skills. Same shape in `codec_imessage.py`. + +*Change:* +1. **Fail-closed default.** When `allowed_chat_ids`/allowed-handles is empty, **deny all** inbound, log a one-time `bridge_no_allowlist` warning telling the operator how to allow their own chat id. (`codec_telegram.py` get-config + the `:509` guard; mirror in `codec_imessage.py`.) +2. **`BRIDGE_SAFE_SKILLS` allowlist.** Inbound bridge messages may only dispatch skills on an explicit safe list (read/info skills); `terminal`, `python_exec`, `file_write`, `pilot`, `process_manager`, `pm2_control`, `ax_control` are never reachable from a bridge. Enforced in `codec_bridges.try_skill`. + +*Migration/compat:* **behavior change** — a user currently relying on empty-list-allows-all will stop receiving bridge responses until they add their chat id. This is the correct secure default; document in release notes + the warning log line. **DECISION-REQUIRED** (§3, Decision B) on whether to ship fail-closed immediately or behind a one-release deprecation warning. + +*Test:* `tests/test_bridges.py` — empty allowlist denies; populated allowlist permits only listed ids; `terminal`/`python_exec` rejected from bridge path even when chat id is allowed. + +*Rollback:* revert the guard default + remove `BRIDGE_SAFE_SKILLS` gate. Inbound stays PWA-only regardless (no new inbound channel added — consistent with CLAUDE.md). + +--- + +#### Fix #3 — Remove `python_exec` from chat allowlist + tighten sandbox file-read + +**Closes:** C3. + +*Problem:* `codec_dashboard.py:2219` includes `python_exec` in `CHAT_SKILL_ALLOWLIST` → a prompt-injection payload in chat can fire it. The sandbox profile `codec_sandbox.py:28` `(allow file-read*)` lets sandboxed code read `~/.ssh`, `~/.codec/secrets*`, `oauth_state.json`; the skill's return value flows back into the chat/LLM transcript — a read-then-return exfil path even though network egress is denied. + +*Change:* +1. **Remove `python_exec` from `CHAT_SKILL_ALLOWLIST`** (line 2219). It remains available as a skill but is no longer auto-firable from the pre-LLM chat hijack / post-LLM tag path. (`SKILL_MCP_EXPOSE=False` already keeps it off MCP.) +2. **Tighten the sandbox read scope.** Replace blanket `(allow file-read*)` with an allow-list of the paths Python genuinely needs (interpreter prefix, stdlib, site-packages, `/private/tmp`, skill_output) and an explicit `(deny file-read* (subpath "/.ssh"))` / `~/.codec/secrets*` / `oauth_state.json` / Keychain dirs. Default-deny is safer than default-allow here. + +*Migration/compat:* removing `python_exec` from the chat allowlist is the intended security posture; legitimate local Python execution still works via explicit invocation. Sandbox tightening must be validated against the existing `python_exec` smoke tests so we don't break legitimate imports. **DECISION-REQUIRED** (§3, Decision C) — full removal vs. keep-behind-strict-consent. + +*Test:* `tests/test_python_exec.py` — injection-style chat message no longer triggers `python_exec`; sandboxed read of `~/.ssh/id_rsa` is denied; a benign `import json; print(...)` still runs. + +*Rollback:* re-add the set member + revert the profile. + +--- + +### PHASE B — Robustness & correctness + +--- + +#### Fix #4 — Extract `codec_concurrency.run_with_timeout`; kill the hanging ThreadPoolExecutor pattern + +**Closes:** C4. + +*Problem:* `codec_mcp.py:222-224` and `codec_observer.py:276-278` use `with ThreadPoolExecutor() as ex: fut.result(timeout=…)`. On timeout, `__exit__` calls `shutdown(wait=True)` and **blocks on the runaway task** — the timeout is defeated; MCP/observer can hang. + +*Change:* extract the proven pattern from `codec_hooks._run_hook_with_timeout` into a new, additive `codec_concurrency.run_with_timeout(fn, timeout, ...)` (daemon thread + `Event`, no blocking `__exit__`). Migrate the two call sites. New module = zero risk; the two migrations are the only working-code touch. + +*Test:* `tests/test_concurrency.py` — a function exceeding the timeout returns control promptly and does NOT block on shutdown; result/exception propagation correct. + +*Rollback:* revert the two call sites; leave the (unused) helper or delete it. + +--- + +#### Fix #5 — SQLite + JSON state locking pass + +**Closes:** C5, plus High/Med H14 + M6. + +*Problem:* shared `sqlite3` connection across threads with no lock (`codec_memory.py:44`); `routes/agents.py` `grant_permission` and `codec_ask_user.py` `_write_question_notification` do read-modify-write on JSON state without `file_lock`. + +*Change:* +1. Wrap `CodecMemory` connection use in a `threading.RLock` (cursor ops serialized; WAL + busy_timeout stay as the cross-process layer). +2. Route the two JSON read-modify-write sites through `codec_jsonstore.file_lock(...)` + `atomic_write_json(...)`. + +*Migration/compat:* none — internal locking only. Verify no deadlock vs. existing `_auth_lock`/`self._lock` usages. + +*Test:* concurrency stress test on `CodecMemory.save` from N threads (no corruption); concurrent `grant_permission` writes don't clobber. + +*Rollback:* per-site revert. + +--- + +#### Fix #6 — Dependency upgrade pass + PM2-to-venv pin + +**Closes:** C7 (transitive) + dependency-auditor Highs. + +*Problem:* runtime Python 3.13 drifted below `requirements.txt` minimums; several packages have advisories; PM2 may launch system python instead of the venv. + +*Change:* (1) `pip-audit` → upgrade the flagged set (verify each advisory first; **no version asserted on faith**). (2) Pin PM2 `interpreter:` in `ecosystem.config.js` to the venv python — **process `name:` fields untouched** (don't-touch zone). (3) Re-run full suite after upgrades. + +*Migration/compat:* dependency bumps can ripple — do this on a branch, full suite must stay green. **DECISION-REQUIRED** (§3, Decision D) — aggressive (latest) vs. minimal (only-advisory-affected) upgrade. + +*Rollback:* `requirements.txt` revert + `pip install -r`. + +--- + +### PHASE C — Structural & coverage + +--- + +#### Fix #7 — SSRF + skill-overwrite hardening + +**Closes:** H1, H2, H6. + +*Change:* add an SSRF guard (block private/link-local/metadata IPs, enforce http(s), cap redirects/size) shared by `web_fetch` + `clipboard_url_fetch`; ensure `skill_approve`/skill-write can't overwrite a hash-pinned built-in. Additive guard module + call-site wiring. + +*Test:* SSRF guard rejects `169.254.169.254`, `127.0.0.1`, `file://`, `10.x`; allows public hosts. Skill-overwrite of a manifest-pinned name refused. + +*Rollback:* per-site revert. + +--- + +#### Fix #8 — `routes/chat.py` extraction (first slice of the god-module) + +**Closes:** start of C9 (`codec_dashboard.py` 3,859 LOC; `chat_completion` CC 48). + +*Change:* extract the chat handler into `routes/chat.py`, dropping `chat_completion` complexity toward ~15. Pure move + seam, no behavior change. This is a >1-module structural change → its own follow-on design note before implementation. + +*Test:* existing chat/stream tests must pass unchanged (behavior-preserving refactor). + +*Rollback:* revert the extraction commit. + +--- + +#### Fix #9 — Promote `codec_jsonstore` to mandatory state registry + +**Closes:** C8 (~25 ad-hoc `~/.codec/*.json` writers). + +*Change:* inventory every `~/.codec/*.json` writer; migrate ad-hoc writers to `atomic_write_json` + `file_lock`; add `docs/STATE-FILES.md` registry. Large surface → its own design note + phased migration (don't-touch state files migrated last, with operator sign-off each). + +*Test:* per-migrated-file round-trip + concurrent-write test. + +*Rollback:* per-file revert. + +--- + +#### Fix #10 — Test coverage + flake fixes + +**Closes:** test-coverage-reviewer gaps. + +*Change:* TOTP HTTP tests (from Fix #1), OAuth scope-escalation tests, `permission_gate` mutation tests, replace `time.sleep` with `threading.Event` in flaky tests, add a CI grep-guard that fails if a new inline `chat/completions` POST is added outside `codec_llm` (protects the A-12 invariant). + +*Rollback:* tests are additive; safe to drop. + +--- + +## 3. Open decisions requiring explicit operator sign-off + +I will NOT start these until you pick. Each is framed A/B/C with my recommendation first. + +**Decision A — TOTP confirm fix (Fix #1a):** +- **A1 (Recommended):** both layers — require auth session AND server-owns-secret. Closes the takeover completely; ~1 frontend line. +- A2: require-auth-session only (smaller change, but still trusts body secret for an already-authed user — acceptable but weaker). +- A3: server-owns-secret only (defends the secret, but leaves enroll callable pre-auth — weaker). + +**Decision B — Bridge fail-closed rollout (Fix #2):** +- **B1 (Recommended):** ship fail-closed now + loud warning log + release note. Correct secure default. +- B2: one-release deprecation — warn on empty allowlist but still allow, flip to deny next release. + +**Decision C — python_exec chat exposure (Fix #3):** +- **C1 (Recommended):** remove from `CHAT_SKILL_ALLOWLIST` entirely + tighten sandbox. +- C2: keep in allowlist but route every fire through Step-3 strict-consent + tighten sandbox. + +**Decision D — Dependency upgrade aggressiveness (Fix #6):** +- **D1 (Recommended):** minimal — only advisory-affected packages, verified one-by-one. +- D2: aggressive — bring everything to latest compatible, full-suite gated. + +**Decision E — Execution granularity:** +- **E1 (Recommended):** I implement Phase A (Fixes 1-3) now as separate commits, each with tests green, surfacing the don't-touch-adjacent `oauth_state.json` write-durability change before its commit. Pause for review after Phase A. +- E2: I implement one specific fix you name, then stop. +- E3: this doc is enough for now — you'll schedule implementation later. + +--- + +## 4. Zero-risk first action (no working-code edit) + +Independent of the decisions above, the one fully-reversible, behavior-neutral action that unblocks the most downstream work is **Fix #1c**: verify the fastmcp advisory + safe version and install it in the test env so `pytest --collect-only` is clean and the 9 failing tests can actually run. That gives a true green baseline to measure every subsequent fix against. I'll do this first unless you say otherwise. + +--- + +## 5. What this plan deliberately does NOT do + +- Does not change OAuth TTLs (maintainer-set today). +- Does not alter the audit envelope schema, `_HTTP_BLOCKED` membership, PM2 process names, or `codec_identity.py`. +- Does not add any inbound channel (bridges stay outbound/response-only). +- Does not auto-deploy anything from `~/.codec/skill_proposals/`. +- Does not touch `memory.db` schema (Fix #5 is a runtime lock, not a migration). diff --git a/requirements.txt b/requirements.txt index 5d9a743..41baf9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,12 @@ simple-term-menu>=1.6.6 httpx>=0.28.1 fastmcp>=3.3.1 +# TOTP 2FA auth (routes/auth.py: /api/auth/totp/* + skills/qr_generator.py). +# Previously undeclared runtime deps — the TOTP setup/confirm/verify endpoints +# import these lazily; without them the 2FA flow 500s in production. +pyotp>=2.9.0 +qrcode[pil]>=8.0 + # Optional: TTS (Kokoro 82M — Apple Silicon) # mlx-audio misaki num2words phonemizer-fork spacy diff --git a/routes/auth.py b/routes/auth.py index 6b13a5d..12019a0 100644 --- a/routes/auth.py +++ b/routes/auth.py @@ -175,6 +175,8 @@ async def auth_pin(request: Request): @router.post("/api/auth/totp/setup") async def totp_setup(request: Request): """Generate TOTP secret + QR code for authenticator app setup.""" + if not _verify_biometric_session(request): + return JSONResponse({"error": "Authentication required"}, status_code=401) import pyotp import qrcode import io @@ -182,6 +184,15 @@ async def totp_setup(request: Request): if not AUTH_ENABLED: return JSONResponse({"error": "Auth not enabled"}, status_code=400) secret = pyotp.random_base32() + # C1 layer 2: the server OWNS the enrollment secret. Stash it on the + # caller's session so /confirm verifies against THIS value — never a + # client-supplied body `secret`. In-memory only: _save_sessions() whitelists + # created/ip/method, so the pending secret is never written to disk. + token = request.cookies.get(AUTH_COOKIE_NAME) + if token: + with _auth_lock: + if token in _auth_sessions: + _auth_sessions[token]["pending_totp_secret"] = secret totp = pyotp.TOTP(secret) uri = totp.provisioning_uri(name="CODEC", issuer_name="CODEC") img = qrcode.make(uri) @@ -194,12 +205,27 @@ async def totp_setup(request: Request): @router.post("/api/auth/totp/confirm") async def totp_confirm(request: Request): """Verify TOTP code and save secret to config if valid (first-time setup).""" + if not _verify_biometric_session(request): + return JSONResponse({"error": "Authentication required"}, status_code=401) import pyotp body = await request.json() code = str(body.get("code", "")) - secret = body.get("secret", "") - if not code or not secret: - return JSONResponse({"error": "Missing code or secret"}, status_code=400) + # C1 layer 2: ignore any client-supplied `secret`. Verify against the + # server-stashed pending secret created by /setup, keyed to this session. + token = request.cookies.get(AUTH_COOKIE_NAME) + secret = "" + if token: + with _auth_lock: + sess = _auth_sessions.get(token) + if sess: + secret = sess.get("pending_totp_secret", "") + if not code: + return JSONResponse({"error": "Missing code"}, status_code=400) + if not secret: + return JSONResponse( + {"error": "No pending TOTP enrollment — run setup first"}, + status_code=400, + ) totp = pyotp.TOTP(secret) if totp.verify(code, valid_window=1): try: @@ -213,6 +239,12 @@ async def totp_confirm(request: Request): json.dump(cfg_data, f, indent=2) except Exception as e: return JSONResponse({"error": f"Failed to save config: {e}"}, status_code=500) + # Enrollment complete — clear the one-time pending secret. + if token: + with _auth_lock: + sess = _auth_sessions.get(token) + if sess: + sess.pop("pending_totp_secret", None) _audit_write(f"[{datetime.now().isoformat()}] TOTP_SETUP: 2FA enabled\n") return {"verified": True, "enabled": True, "message": "2FA enabled successfully"} return {"verified": False, "error": "Invalid code. Try again."} diff --git a/tests/test_auth_totp.py b/tests/test_auth_totp.py new file mode 100644 index 0000000..282adcd --- /dev/null +++ b/tests/test_auth_totp.py @@ -0,0 +1,163 @@ +"""Tests for TOTP enrollment auth gate — audit finding C1 (CRITICAL). + +Closes the unauthenticated-TOTP-takeover: + - /api/auth/totp/setup and /api/auth/totp/confirm must require an + authenticated session (Touch ID / PIN), like their siblings + /totp/disable and /totp/enable already do. + - /confirm must verify the code against the SERVER-stored pending secret + created by /setup, not an attacker-supplied `secret` in the request body. + +Reference: docs/SECURITY-REMEDIATION-DESIGN.md Fix #1a. +""" +from __future__ import annotations + +import json +import os +import secrets +import sys +from datetime import datetime +from pathlib import Path + +import pytest + +REPO = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO)) + + +@pytest.fixture +def auth_app(monkeypatch, tmp_path): + """Bare FastAPI app mounting only the auth router, with auth forced ON + so `_verify_biometric_session` actually gates (it short-circuits to True + when auth is disabled / unavailable). CONFIG_PATH is redirected to a tmp + file so no test can ever touch the real ~/.codec/config.json.""" + import routes._shared as shared + import routes.auth as auth_routes + + cfg = str(tmp_path / "config.json") + # Isolation: never write the real config. + monkeypatch.setattr(shared, "CONFIG_PATH", cfg) + monkeypatch.setattr(auth_routes, "CONFIG_PATH", cfg) + # Force auth ON so the session gate is live. + monkeypatch.setattr(shared, "AUTH_ENABLED", True) + monkeypatch.setattr(auth_routes, "AUTH_ENABLED", True) + monkeypatch.setattr(shared, "_auth_available", lambda: True) + + from fastapi import FastAPI + from fastapi.testclient import TestClient + + app = FastAPI() + app.include_router(auth_routes.router) + return TestClient(app) + + +def test_totp_confirm_rejects_unauthenticated(auth_app): + """C1 layer 1: an unauthenticated caller must NOT be able to enroll a TOTP + secret. Before the fix, /confirm trusted a body-supplied secret+code and + wrote it to config.json — full account takeover. It must return 401. + + The gate must reject BEFORE any TOTP processing, so this sends a dummy + code+secret (no pyotp needed) — a 401 proves the short-circuit fires + ahead of secret verification.""" + r = auth_app.post( + "/api/auth/totp/confirm", + json={"code": "123456", "secret": "DUMMYSECRET234567ABCD"}, + ) + assert r.status_code == 401, ( + f"Unauthenticated TOTP enrollment must be rejected (C1); " + f"got {r.status_code}: {r.text[:200]}" + ) + + +def test_totp_setup_rejects_unauthenticated(auth_app): + """C1 layer 1: /setup generates a server-side secret + QR. An + unauthenticated caller must not be able to start enrollment either — + otherwise an attacker can drive the whole setup→confirm flow. 401.""" + r = auth_app.post("/api/auth/totp/setup", json={}) + assert r.status_code == 401, ( + f"Unauthenticated TOTP setup must be rejected (C1); " + f"got {r.status_code}: {r.text[:200]}" + ) + + +# ── Layer 2: server owns the secret ───────────────────────────────────────── + + +@pytest.fixture +def authed(auth_app): + """Yield a client with a FORGED valid biometric session cookie so the auth + gate passes — letting us exercise the post-gate server-owns-secret logic. + The session is in-memory only and torn down after the test.""" + import routes._shared as shared + + token = "test-session-" + secrets.token_hex(8) + with shared._auth_lock: + shared._auth_sessions[token] = { + "created": datetime.now(), + "ip": "127.0.0.1", + "method": "test", + } + auth_app.cookies.set(shared.AUTH_COOKIE_NAME, token) + try: + yield auth_app + finally: + with shared._auth_lock: + shared._auth_sessions.pop(token, None) + + +def test_totp_confirm_uses_server_secret_not_body(authed): + """C1 layer 2 (THE security invariant): /confirm must verify against the + SERVER-stashed pending secret created by /setup, NOT an attacker-supplied + body `secret`. Even an authenticated user (or a CSRF'd browser) must not + be able to enroll a secret of their own choosing.""" + import pyotp + import routes._shared as shared + + client = authed + # 1. Begin enrollment — server generates + must stash its own secret. + r_setup = client.post("/api/auth/totp/setup", json={}) + assert r_setup.status_code == 200, r_setup.text + server_secret = r_setup.json()["secret"] + + # 2. Attacker submits a DIFFERENT secret + a code valid FOR THAT secret. + attacker_secret = pyotp.random_base32() + assert attacker_secret != server_secret + attacker_code = pyotp.TOTP(attacker_secret).now() + r = client.post( + "/api/auth/totp/confirm", + json={"code": attacker_code, "secret": attacker_secret}, + ) + body = r.json() + assert body.get("verified") is not True, ( + f"Body-supplied secret must be IGNORED (C1 layer 2); got {body}" + ) + # 3. The attacker secret must NEVER reach config.json. + cfg = {} + if os.path.exists(shared.CONFIG_PATH): + with open(shared.CONFIG_PATH) as f: + cfg = json.load(f) + assert cfg.get("totp_secret") != attacker_secret, ( + "Attacker-chosen secret must never be persisted to config (C1 layer 2)" + ) + + +def test_totp_confirm_accepts_server_secret_code(authed): + """C1 layer 2 happy path: a code computed from the SERVER secret (with NO + body secret at all) confirms and enrolls exactly that server secret.""" + import pyotp + import routes._shared as shared + + client = authed + r_setup = client.post("/api/auth/totp/setup", json={}) + assert r_setup.status_code == 200, r_setup.text + server_secret = r_setup.json()["secret"] + + code = pyotp.TOTP(server_secret).now() + # Body deliberately omits `secret` — the server must use its own pending one. + r = client.post("/api/auth/totp/confirm", json={"code": code}) + assert r.status_code == 200, r.text + assert r.json().get("verified") is True, r.text + with open(shared.CONFIG_PATH) as f: + cfg = json.load(f) + assert cfg.get("totp_secret") == server_secret, ( + "Confirmed enrollment must persist the SERVER secret (C1 layer 2)" + ) diff --git a/tests/test_bridges.py b/tests/test_bridges.py index 76a6358..4662206 100644 --- a/tests/test_bridges.py +++ b/tests/test_bridges.py @@ -7,15 +7,20 @@ """ from __future__ import annotations +import logging import sqlite3 import sys from pathlib import Path +import pytest + REPO = Path(__file__).resolve().parent.parent sys.path.insert(0, str(REPO)) import codec_bridges # noqa: E402 +import codec_imessage # noqa: E402 import codec_llm # noqa: E402 +import codec_telegram # noqa: E402 def _cfg(api_key="", kwargs=None): @@ -116,3 +121,140 @@ def test_bridges_use_codec_bridges(): assert "codec_bridges" in src, f"{mod} doesn't use codec_bridges" # the duplicated skill-dispatch internals are gone assert "from codec_dispatch import check_skill, run_skill" not in src, f"{mod} still has its own dispatch loader" + + +# ── Fix #2 (C2): fail-closed inbound allowlists ─────────────────────────────── +# Before: an EMPTY allowed_chat_ids / allowed_senders meant "allow all" — anyone +# who learned the bot token (telegram) or the handle (imessage) could drive +# CODEC. The secure default is fail-closed: empty allowlist denies everyone until +# the operator explicitly adds their own id/handle. + + +# iMessage — is_sender_allowed(sender, im_cfg) + + +def test_imessage_empty_allowlist_denies(): + """C2: empty allowed_senders must DENY all inbound (was allow-all).""" + assert codec_imessage.is_sender_allowed( + "+15551234567", {"allowed_senders": []}) is False + + +def test_imessage_missing_allowlist_denies(): + """C2: allowlist key entirely absent → deny (fail-closed default).""" + assert codec_imessage.is_sender_allowed("+15551234567", {}) is False + + +def test_imessage_listed_sender_allowed(): + assert codec_imessage.is_sender_allowed( + "+15551234567", {"allowed_senders": ["+15551234567"]}) is True + + +def test_imessage_unlisted_sender_denied(): + assert codec_imessage.is_sender_allowed( + "+19998887777", {"allowed_senders": ["+15551234567"]}) is False + + +def test_imessage_blocked_still_denied(): + """Blocklist still wins even if the sender is also in the allowlist.""" + assert codec_imessage.is_sender_allowed("+15551234567", { + "allowed_senders": ["+15551234567"], + "blocked_senders": ["+15551234567"], + }) is False + + +def test_imessage_empty_allowlist_warns_once(caplog): + """C2: denying on empty allowlist logs ONE bridge_no_allowlist warning + (not one per message — that would spam the log).""" + codec_imessage._warned_no_allowlist = False # reset the one-time flag + with caplog.at_level(logging.WARNING): + codec_imessage.is_sender_allowed("+1", {"allowed_senders": []}) + codec_imessage.is_sender_allowed("+2", {"allowed_senders": []}) + hits = [r for r in caplog.records if "bridge_no_allowlist" in r.getMessage()] + assert len(hits) == 1, f"expected exactly one warning, got {len(hits)}" + + +# Telegram — is_chat_allowed(chat_id, tg_cfg) + + +def test_telegram_empty_allowlist_denies(): + """C2: empty allowed_chat_ids must DENY all inbound (was allow-all).""" + assert codec_telegram.is_chat_allowed(12345, {"allowed_chat_ids": []}) is False + + +def test_telegram_missing_allowlist_denies(): + assert codec_telegram.is_chat_allowed(12345, {}) is False + + +def test_telegram_listed_chat_allowed(): + assert codec_telegram.is_chat_allowed(12345, {"allowed_chat_ids": [12345]}) is True + + +def test_telegram_unlisted_chat_denied(): + assert codec_telegram.is_chat_allowed(999, {"allowed_chat_ids": [12345]}) is False + + +def test_telegram_empty_allowlist_warns_once(caplog): + codec_telegram._warned_no_allowlist = False # reset the one-time flag + with caplog.at_level(logging.WARNING): + codec_telegram.is_chat_allowed(1, {"allowed_chat_ids": []}) + codec_telegram.is_chat_allowed(2, {"allowed_chat_ids": []}) + hits = [r for r in caplog.records if "bridge_no_allowlist" in r.getMessage()] + assert len(hits) == 1, f"expected exactly one warning, got {len(hits)}" + + +def test_telegram_process_message_wired_to_gate(): + """The inline allow-all guard in process_message is replaced by the + fail-closed helper. Source-level invariant (mirrors the existing migration + guard above).""" + src = (REPO / "codec_telegram.py").read_text() + assert "is_chat_allowed(" in src, "process_message must call is_chat_allowed" + # the old allow-all pattern must be gone + assert "if allowed and chat_id not in allowed:" not in src + + +# ── Fix #2 (C2): BRIDGE_SAFE_SKILLS allowlist in try_skill ──────────────────── +# High-power skills must NEVER be reachable from a (remote) bridge, even if the +# dispatcher matched one. The allowlist is default-deny: a non-listed skill is +# simply not dispatched (the bridge degrades to an LLM answer — never a hard +# fail), and the 7 dangerous skills are excluded by construction. + +_DANGEROUS_BRIDGE_SKILLS = [ + "terminal", "python_exec", "file_write", "pilot", + "process_manager", "pm2_control", "ax_control", +] + + +@pytest.mark.parametrize("dangerous", _DANGEROUS_BRIDGE_SKILLS) +def test_try_skill_blocks_dangerous(monkeypatch, dangerous): + """C2: a dangerous skill is never run from a bridge even when matched.""" + monkeypatch.setattr(codec_bridges, "_dispatch_loaded", True) + monkeypatch.setattr(codec_bridges, "_check_skill", lambda t: {"name": dangerous}) + monkeypatch.setattr( + codec_bridges, "_run_skill", + lambda s, t: pytest.fail(f"{dangerous} must not run from a bridge")) + assert codec_bridges.try_skill("do the dangerous thing") == (None, None) + + +def test_try_skill_allows_safe_listed(monkeypatch): + """A safe read/info skill on the allowlist still runs.""" + monkeypatch.setattr(codec_bridges, "_dispatch_loaded", True) + monkeypatch.setattr(codec_bridges, "_check_skill", lambda t: {"name": "weather"}) + monkeypatch.setattr(codec_bridges, "_run_skill", lambda s, t: "sunny") + assert codec_bridges.try_skill("weather?") == ("weather", "sunny") + + +def test_try_skill_unlisted_safe_skill_not_dispatched(monkeypatch): + """A skill that isn't on BRIDGE_SAFE_SKILLS degrades to (None, None) so the + bridge falls back to an LLM answer — not a hard break.""" + monkeypatch.setattr(codec_bridges, "_dispatch_loaded", True) + monkeypatch.setattr(codec_bridges, "_check_skill", lambda t: {"name": "google_gmail"}) + monkeypatch.setattr( + codec_bridges, "_run_skill", + lambda s, t: pytest.fail("unlisted skill must not run from a bridge")) + assert codec_bridges.try_skill("send an email") == (None, None) + + +def test_bridge_safe_skills_excludes_dangerous(): + """The allowlist must contain NONE of the 7 dangerous skills.""" + for d in _DANGEROUS_BRIDGE_SKILLS: + assert d not in codec_bridges.BRIDGE_SAFE_SKILLS diff --git a/tests/test_oauth_provider.py b/tests/test_oauth_provider.py index 224d538..ff95143 100644 --- a/tests/test_oauth_provider.py +++ b/tests/test_oauth_provider.py @@ -98,6 +98,62 @@ def test_expired_tokens_pruned_on_load(tmp_path): assert "dead" not in p.access_tokens +def test_fallback_write_is_fsync_durable(tmp_path, monkeypatch): + """C6: when Keychain (and its encrypted fallback) cannot persist, _save + writes the plaintext oauth_state.json via a CRASH-DURABLE path — unique + tmp + fsync + atomic replace + 0600. Before the fix the fallback did + `tmp.write_text(blob); os.replace(...)` with NO fsync, so a crash between + write() and the page-cache flush could land a truncated/empty file and + lose every OAuth token (claude.ai forced re-auth on next restart). + + We assert the durability behaviour directly: os.fsync MUST be invoked on + the fallback write, and the resulting file must round-trip + be 0600. + Reference: docs/SECURITY-REMEDIATION-DESIGN.md Fix #1b. + """ + import json + import os + import stat + + import codec_keychain + + state = tmp_path / "oauth_state.json" + + # Force the plaintext fallback path: Keychain (and its encrypted fallback) + # report they could not persist this write. + monkeypatch.setattr(codec_keychain, "set_oauth_state", lambda blob: False) + + # Spy on fsync to prove the write is flushed to stable storage before the + # atomic replace. Delegate to the real fsync so behaviour is preserved. + fsync_calls = {"n": 0} + real_fsync = os.fsync + + def _counting_fsync(fd): + fsync_calls["n"] += 1 + return real_fsync(fd) + + monkeypatch.setattr(os, "fsync", _counting_fsync) + + p = PersistentOAuthProvider( + base_url="https://test.example.com", + client_registration_options=ClientRegistrationOptions(enabled=True), + state_path=state, + ) + asyncio.run(p.register_client(_make_client("durable"))) + + assert fsync_calls["n"] >= 1, ( + "Fallback oauth_state.json write must fsync before replace (C6); " + "no fsync was observed during _save" + ) + # Round-trips + correct perms. + assert state.exists(), "fallback must persist the plaintext state file" + data = json.loads(state.read_text()) + assert "durable" in data.get("clients", {}), ( + "persisted state must contain the registered client" + ) + mode = stat.S_IMODE(os.stat(state).st_mode) + assert mode == 0o600, f"fallback file must be 0600; got {oct(mode)}" + + if __name__ == "__main__": import tempfile with tempfile.TemporaryDirectory() as d: diff --git a/tests/test_python_exec.py b/tests/test_python_exec.py index 4013fb8..0904126 100644 --- a/tests/test_python_exec.py +++ b/tests/test_python_exec.py @@ -211,3 +211,134 @@ def test_no_api_execute_route_registered_in_app(): f"/api/execute must NOT be registered; routes: " f"{[p for p in paths if 'execute' in p or 'api' in p][:10]}" ) + + +# ── Fix #3 (C3): python_exec off chat-allowlist + tightened sandbox reads ───── +# Closes audit finding C3. Two layers: +# 1. python_exec is removed from CHAT_SKILL_ALLOWLIST so an injection-style +# chat message can no longer auto-fire it via the pre-LLM hijack / post-LLM +# tag path. It stays a skill (still reachable on the local voice/chat tag +# path only through an explicit user [SKILL:...] is now impossible too — +# the allowlist gates BOTH hijack and tag). SKILL_MCP_EXPOSE=False already +# keeps it off MCP. +# 2. The sandbox read scope keeps the broad `(allow file-read*)` (so legitimate +# stdlib imports never break — the "don't break working code" constraint) +# but layers explicit `(deny file-read* ...)` rules AFTER it (SBPL is +# last-match-wins) over the named credential paths: ~/.ssh, ~/.aws, +# ~/.gnupg, the ~/.codec secret/oauth/config files, and the Keychain dirs. + + +def test_python_exec_not_in_chat_allowlist(): + """C3.1: python_exec must NOT be auto-firable from chat. Removing it from + CHAT_SKILL_ALLOWLIST closes both the pre-LLM hijack and the post-LLM tag + path (both gate on this set).""" + import codec_dashboard + assert "python_exec" not in codec_dashboard.CHAT_SKILL_ALLOWLIST, ( + "python_exec must be removed from CHAT_SKILL_ALLOWLIST (C3) — it stays " + "a skill but is no longer auto-firable from a chat message" + ) + + +def test_dashboard_prompt_drops_python_exec_example(): + """C3.1: the chat system-prompt addon must not advertise a + [SKILL:python_exec:...] example — the tag would be parsed but the skill is + no longer on the allowlist, so showing it as an example is misleading.""" + src = (REPO / "codec_dashboard.py").read_text() + assert "[SKILL:python_exec:" not in src, ( + "the python_exec skill-tag example must be removed from the chat prompt " + "addon now that python_exec is off CHAT_SKILL_ALLOWLIST" + ) + + +def test_sandbox_profile_denies_secret_paths(): + """C3.2: the generated sandbox profile must explicitly deny reads of the + named credential paths so a sandboxed python_exec (or any sandboxed skill) + cannot exfiltrate private keys / OAuth tokens / Keychain material.""" + from codec_sandbox import _write_sandbox_profile + path = _write_sandbox_profile(allow_network=False) + with open(path) as f: + content = f.read() + assert "(deny file-read*" in content, "no file-read deny rule present" + for needle in ("/.ssh", "oauth_state.json", "Keychains"): + assert needle in content, f"sandbox profile must deny reads of {needle}" + + +def test_sandbox_profile_retains_broad_read(): + """C3.2 constraint: the broad `(allow file-read*)` MUST remain so legitimate + stdlib / site-packages imports keep working — the deny rules are layered + AFTER it (last-match-wins), not a replacement. Guards the 'don't break + working code' rule against a future switch to a read-allowlist.""" + from codec_sandbox import _write_sandbox_profile + path = _write_sandbox_profile(allow_network=False) + with open(path) as f: + content = f.read() + assert "(allow file-read*)" in content, ( + "broad file-read allow must stay — removing it risks breaking imports" + ) + + +def test_sandbox_deny_is_after_allow(): + """C3.2 mechanism: SBPL is last-match-wins, so the targeted deny rules must + appear AFTER the broad `(allow file-read*)` for the deny to take effect.""" + from codec_sandbox import _write_sandbox_profile + path = _write_sandbox_profile(allow_network=False) + with open(path) as f: + content = f.read() + allow_idx = content.index("(allow file-read*)") + deny_idx = content.index("(deny file-read*") + assert deny_idx > allow_idx, ( + "targeted file-read denies must come AFTER the broad allow " + "(SBPL last-match-wins) or they have no effect" + ) + + +@pytest.mark.skipif( + platform.system() != "Darwin", + reason="sandbox-exec is macOS-only", +) +def test_sandbox_denies_ssh_read_live(): + """C3.2 behavioral: a python_exec snippet that opens a file under ~/.ssh is + DENIED by the sandbox, even though `open()` passes the AST gate. Proves the + deny rule actually enforces — not just that a string is in the profile. + + Routed through the REAL python_exec.run() path so it uses the same + interpreter selection (_python_bin → homebrew python, which process-exec + allows). `open()` is deliberately allowed by the AST gate + (codec_config.py:737), so the ONLY thing that can stop the read is the + sandbox deny. Reads 1 byte and prints a sentinel — never the secret bytes.""" + ssh_dir = Path.home() / ".ssh" + if not ssh_dir.exists(): + pytest.skip("no ~/.ssh on this machine to probe") + target = next((p for p in sorted(ssh_dir.iterdir()) if p.is_file()), None) + if target is None: + pytest.skip("~/.ssh has no regular file to probe") + code = ( + f"f = open({str(target)!r}, 'rb')\n" + "_b = f.read(1)\n" + "f.close()\n" + "print('READ_OK' if _b else 'READ_EMPTY')\n" + ) + result = python_exec.run(f"run python ```\n{code}```") + assert "READ_OK" not in result, ( + f"sandbox must DENY reading under ~/.ssh; got: {result!r}" + ) + low = result.lower() + assert result.startswith("Error") or "not permitted" in low or "denied" in low, ( + f"expected a sandbox permission denial; got: {result!r}" + ) + + +@pytest.mark.skipif( + platform.system() != "Darwin", + reason="sandbox-exec is macOS-only", +) +def test_python_exec_benign_import_still_runs_after_tightening(): + """C3.2 constraint (behavioral): a benign `import json` skill still runs + end-to-end through the tightened sandbox — the deny rules must not break + legitimate stdlib reads.""" + result = python_exec.run( + "run python ```\nimport json\nprint(json.dumps({'a': 1}))\n```" + ) + assert '"a": 1' in result or '"a":1' in result, ( + f"benign import json must still run after sandbox tightening; got: {result!r}" + )