AVADSA25 · AVADSA25 · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,43 +16,29 @@ jobs:
       - name: Install deps
         run: |
           python -m pip install --upgrade pip
-          pip install pytest fastmcp httpx requests pynput pydantic ruff
+          # Runtime deps the full suite needs to import codec_dashboard /
+          # routes/* / codec_voice without ModuleNotFoundError on the Ubuntu
+          # runner. pynput stays in the list, but the conftest installs a
+          # stub when its real import fails (headless Linux has no X11).
+          pip install pytest fastmcp httpx requests pynput pydantic ruff \
+                      fastapi numpy
       - name: Lint (ruff) — F-4 gate, config in ruff.toml
         run: ruff check .
-      - name: Skill import smoke test
+      - name: Skill import smoke (standalone script, not a pytest module)
         run: python tests/test_skill_imports.py
-      - name: Skill contract tests
-        run: python -m pytest tests/test_skill_contracts.py -v
       - name: Trusted-skill manifest is current (D-1 gate)
         run: python tools/generate_skill_manifest.py --check
-      - name: Skill registry load-time AST gate tests (D-1)
-        run: python -m pytest tests/test_skill_registry.py -v
-      - name: Keychain helper + secret-migration tests (D-8, D-15)
-        run: python -m pytest tests/test_keychain.py -v
-      - name: OAuth provider unit test
-        run: python -m pytest tests/test_oauth_provider.py -v
-      - name: Retry helper unit test
-        run: python -m pytest tests/test_retry.py -v
-      - name: Readiness doc-guard tests (F-4 coverage expansion)
-        run: >
-          python -m pytest -v
-          tests/test_repo_health.py
-          tests/test_privacy_doc.py
-          tests/test_readme_investor.py
-          tests/test_one_pager.py
-          tests/test_apple_packaging.py
-          tests/test_dependabot.py
-          tests/test_versioning.py
-          tests/test_pyproject.py
-          tests/test_strict_ast_gate.py
-      - name: Apple packaging tests (W5 — bundle, launchd, python, models, sign, notarize, uninstall, release)
-        run: >
-          python -m pytest -v
-          tests/test_app_bundle.py
-          tests/test_launchd.py
-          tests/test_python_bundle.py
-          tests/test_model_fetch.py
-          tests/test_uninstaller.py
-          tests/test_signing.py
-          tests/test_release.py
-          tests/test_first_run.py
+      - name: Run full pytest suite
+        # Closes audit F-3: CI used to run 23 of 134 test files, leaving
+        # the Wave-1+2 hardening tests (D-7/D-9/D-12/D-13/D-18/D-19/D-21/D-22)
+        # unprotected on PRs. `pytest tests/` runs every test file at once.
+        # tests/test_ci_coverage_invariant.py guards this configuration
+        # against future drift.
+        run: |
+          python -m pytest tests/ \
+            --tb=short \
+            --strict-markers \
+            --strict-config \
+            -ra
+        env:
+          CI: "true"
diff --git a/README.md b/README.md
@@ -482,6 +482,28 @@ Then in Claude Desktop: *"Use CODEC to check my calendar for tomorrow."*
 
 Skills opt-in to MCP exposure with `SKILL_MCP_EXPOSE = True`. Input validation enforces 5KB task / 10KB context limits with type checking on every call.
 
+### Configuring which skills CODEC exposes over MCP
+
+CODEC defaults to **opt-in** — only skills you explicitly allow reach the MCP surface. Three keys in `~/.codec/config.json` control the policy:
+
+| Option | Default | Effect |
+|---|---|---|
+| `mcp_default_allow` | `false` | When `true`, every skill with `SKILL_MCP_EXPOSE = True` is exposed (opt-out via `mcp_blocked_tools`). When `false` (recommended), nothing is exposed unless listed in `mcp_allowed_tools`. |
+| `mcp_allowed_tools` | `[]` | Explicit allowlist of skill names exposed over MCP when `mcp_default_allow` is `false`. Example: `["calculator", "weather", "memory_search"]`. |
+| `mcp_blocked_tools` | `["terminal", "process_manager", "pm2_control"]` | Hard blocklist applied on every MCP transport regardless of the above. The HTTP transport adds a stricter built-in blocklist (`python_exec`, `ax_control`) that cannot be overridden. |
+
+Example config snippet:
+
+```jsonc
+{
+  "mcp_default_allow": false,
+  "mcp_allowed_tools": ["calculator", "weather", "memory_search", "google_calendar"],
+  "mcp_blocked_tools": ["terminal", "process_manager", "pm2_control"]
+}
+```
+
+Restart `codec-mcp-http` (HTTP transport) or the host MCP client (stdio transport) after changes.
+
 ### What this unlocks (that Claude alone can't do)
 
 Claude Desktop/Code/Cursor gain — through this one MCP bridge — everything CODEC already owns on *your* machine:

diff --git a/codec_voice.py b/codec_voice.py
@@ -24,6 +24,8 @@
 import httpx
 import numpy as np
 
+log = logging.getLogger("codec_voice")
+
 from codec_audit import log_event as _voice_log_event
 from codec_hooks import (
     HookVeto,
@@ -219,7 +221,7 @@ def _clear_voice_session_marker() -> None:
     GEMINI_API_KEY = get_gemini_api_key()
     VISION_PROVIDER = _cfg.get("vision_provider", "gemini" if GEMINI_API_KEY else "local")
 except Exception:
-    pass
+    log.debug("voice: vision provider/Keychain bootstrap failed", exc_info=True)
 
 # Screen-related trigger phrases
 _SCREEN_TRIGGERS = re.compile(
@@ -323,7 +325,7 @@ def _build_system_prompt() -> str:
                 f"  {f['key']} = {f['value']}" for f in facts
             ) + "\n[/FACTS]"
     except Exception:
-        pass
+        log.debug("voice: facts injection into system prompt skipped", exc_info=True)
 
     return f"""You are {_aname} — CODEC Voice, a JARVIS-class local AI running on a Mac Studio M1 Ultra.{_boot}
 {f'The user is {_uname}. ' if _uname else ''}Fully local. No cloud. No external logs.
@@ -435,7 +437,7 @@ def _prune_resumable(cls, now=None):
                 cls._resumable_sessions.pop(sid, None)
                 cls._resume_timestamps.pop(sid, None)
         except Exception:
-            pass
+            log.debug("voice: stale-resume-session cleanup pass swallowed", exc_info=True)
 
     def _save_for_resume(self):
         """Stash conversation state so a reconnecting client can resume."""
@@ -686,7 +688,7 @@ async def generate_response(self, user_text: str):
                     "content": _build_system_prompt() + f"\n\n[MEMORY — RELEVANT CONTEXT]\n{targeted}\n[END MEMORY]"
                 }
         except Exception:
-            pass
+            log.debug("voice: targeted-memory injection skipped", exc_info=True)
         # Phase 2 Step 5 — Observer summary injection (gated per §X).
         # Voice always uses local Qwen by default (transport="local"); if
         # the user has cloud-routed voice configured (vision_provider=
@@ -882,7 +884,7 @@ async def _handle_voice_ask_user_answer(self, qid: str,
             try:
                 await self._speak("Something went wrong recording your answer.")
             except Exception:
-                pass
+                log.debug("voice: fallback TTS for ask_user error path failed", exc_info=True)
 
     async def dispatch_skill(self, skill: dict, user_text: str) -> Optional[str]:
         try:
@@ -1384,7 +1386,7 @@ async def run(self):
                                     "resume_id": self.session_id if is_resumed else None},
                              correlation_id=cid)
         except Exception:
-            pass
+            log.debug("voice: voice_session_start audit emit failed", exc_info=True)
         # Phase 1 Step 2: fire on_operation_start hooks (per-plugin, not the
         # voice_session_start audit event above — that's Step 1 vocabulary
         # and intentionally unchanged). Hook layer never raises.
@@ -1393,7 +1395,7 @@ async def run(self):
                                  transport="voice",
                                  correlation_id=cid)
         except Exception:
-            pass
+            log.debug("voice: on_operation_start hook emit failed", exc_info=True)
 
         # Send session ID so client can reconnect to this session
         await self.ws.send_json({"type": "session", "session_id": self.session_id})
@@ -1458,7 +1460,7 @@ async def run(self):
                                  error=run_error,
                                  correlation_id=cid)
             except Exception:
-                pass
+                log.debug("voice: voice_session_end audit emit failed", exc_info=True)
             # Phase 1 Step 2: fire on_operation_end hooks. Same caveat as
             # the start emit above — voice_session_end audit event is Step 1
             # vocabulary and unchanged; on_operation_end is the hook-layer
@@ -1470,11 +1472,11 @@ async def run(self):
                                    duration_ms=duration_ms,
                                    outcome=run_outcome)
             except Exception:
-                pass
+                log.debug("voice: on_operation_end hook emit failed", exc_info=True)
             try:
                 _voice_correlation_id_var.reset(cid_token)
             except Exception:
-                pass
+                log.debug("voice: correlation_id contextvar reset failed", exc_info=True)
             # Phase 1 Step 3 §5.3 — clear the active-session marker so
             # codec_ask_user falls back to PWA-only for any subsequent
             # questions. Best-effort; failures don't break shutdown.

diff --git a/ruff.toml b/ruff.toml
@@ -27,8 +27,10 @@ ignore = [
 [lint.per-file-ignores]
 # Tests legitimately keep unused locals (fixtures, captured-but-unasserted) and redefine helpers.
 "tests/*" = ["F811", "F841"]
-# Smoke test + feature-audit script import symbols purely to probe availability (F401 is the point).
-"tests/test_smoke.py" = ["F811", "F841", "F401"]
+# Smoke + feature-audit scripts import symbols purely to probe availability (F401 is the point).
+# Note: smoke.py moved tests/ → scripts/ when CI flipped to full-suite pytest
+# (it was always a hand-rolled sys.exit(1) script, never a pytest module).
+"scripts/smoke.py" = ["F811", "F841", "F401"]
 "scripts/feature_audit.py" = ["F401", "F841"]
 # A handful of benign unused locals in working skills — left as-is rather than risk-edit live code.
 "skills/memory_entities.py" = ["F841"]

diff --git a/tests/test_smoke.py → scripts/smoke.py b/tests/test_smoke.py → scripts/smoke.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python3
 """CODEC Smoke Test — catches import errors, NameErrors, and path mismatches.
 
-Run after ANY change to codec.py, codec_overlays.py, codec_watcher.py, or codec_dashboard.py:
-    python3.13 tests/test_smoke.py
+Manual sanity-check script, not a pytest module. Run by hand after ANY change
+to codec.py, codec_overlays.py, codec_watcher.py, or codec_dashboard.py:
+    python3.13 scripts/smoke.py
 
-Every test here exists because a real bug shipped without it.
+Lives in scripts/ rather than tests/ because it uses sys.exit(1) on failure
+and prints to stdout — incompatible with pytest collection. Full pytest
+coverage handles regression protection.
 """
 import sys
 import os

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -5,9 +5,19 @@
 gets imported, not the main checkout. Without this, a worktree's Step 3
 codec_audit (with new ASKUSER_EVENT_* constants) gets shadowed by main's
 older copy and tests that rely on the new constants fail.
+
+pynput stub: on Linux CI runners without an X display, `from pynput import
+keyboard` raises ImportError at module-load time (pynput tries to acquire
+an X11 connection). codec.py imports pynput unconditionally; many tests
+do `import codec` at module scope. We install a minimal stub in sys.modules
+BEFORE any test collection so those imports succeed. macOS dev machines
+have a real pynput and skip the stub entirely. Per PR-4F design doc
+(`docs/PR4F-STATE-LOCK-DESIGN.md`) — same pattern, lifted into conftest
+so it applies session-wide instead of per-test.
 """
 import sys
 import os
+import types
 from pathlib import Path
 
 # Worktree's repo root = parent of `tests/`. When running from main repo
@@ -18,3 +28,47 @@
 sys.path.insert(0, os.path.expanduser("~/codec-repo"))
 sys.path.insert(0, os.path.expanduser("~/.codec/skills"))
 sys.path.insert(0, str(_WORKTREE_REPO))
+
+
+def _install_pynput_stub_if_needed() -> None:
+    """Stub `pynput` + `pynput.keyboard` if the real package can't import
+    (headless Linux CI). On macOS the real package imports fine and this
+    no-ops. The stub provides the symbols codec.py touches at import time
+    — Listener, KeyCode, Key, Controller — as minimal placeholders. Tests
+    that actually exercise keyboard behavior bring their own mocks; this
+    stub only unblocks module import."""
+    if "pynput" in sys.modules:
+        return
+    try:
+        import pynput  # noqa: F401
+        return
+    except Exception:
+        pass
+
+    pynput_mod = types.ModuleType("pynput")
+    keyboard_mod = types.ModuleType("pynput.keyboard")
+
+    class _Stub:
+        def __init__(self, *a, **kw): pass
+        def __call__(self, *a, **kw): return self
+        def __getattr__(self, name): return _Stub()
+        def start(self): pass
+        def stop(self): pass
+        def join(self, *a, **kw): pass
+
+    # `_Key` is accessed as `Key.f5`, `Key.f13`, `Key.cmd`, etc. — the set
+    # of attribute names is open-ended (callers also use `f16`, `f17`).
+    # Use a _Stub() instance so __getattr__ returns a fresh non-None _Stub
+    # for ANY attribute, matching pynput's real keyboard.Key namespace
+    # closely enough for codec_config._resolve_key() to return non-None.
+    keyboard_mod.Listener = _Stub
+    keyboard_mod.KeyCode = _Stub
+    keyboard_mod.Key = _Stub()
+    keyboard_mod.Controller = _Stub
+    keyboard_mod.HotKey = _Stub
+    pynput_mod.keyboard = keyboard_mod
+    sys.modules["pynput"] = pynput_mod
+    sys.modules["pynput.keyboard"] = keyboard_mod
+
+
+_install_pynput_stub_if_needed()
diff --git a/tests/test_ci_coverage_invariant.py b/tests/test_ci_coverage_invariant.py
@@ -0,0 +1,67 @@
+"""Guard against future CI-coverage drift.
+
+The 2026-05 audit (F-3) found that .github/workflows/ci.yml ran 23 of the
+134 test files in tests/ — leaving the Wave-1+2 hardening tests
+(D-7 / D-9 / D-12 / D-13 / D-18 / D-19 / D-21 / D-22) without regression
+protection on PRs. This file's tests fail loudly if anyone re-introduces
+that pattern by enumerating individual test files in ci.yml instead of
+running the full tests/ tree.
+
+Removing this file or weakening its assertions REGRESSES F-3 closure.
+"""
+from pathlib import Path
+import re
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+CI_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "ci.yml"
+
+
+def test_ci_workflow_exists() -> None:
+    assert CI_WORKFLOW.exists(), f"ci.yml not found at {CI_WORKFLOW}"
+
+
+def test_ci_runs_full_pytest_suite() -> None:
+    """ci.yml must invoke `pytest tests/` (or equivalent full-tree form),
+    not an enumerated subset of files."""
+    content = CI_WORKFLOW.read_text()
+
+    pytest_invocations = re.findall(r"pytest[^\n]*", content)
+    # Filter out the matches that are just comments mentioning pytest.
+    invocations = [
+        line for line in pytest_invocations
+        if not line.lstrip().startswith("#")
+    ]
+    assert invocations, "No non-comment pytest invocation found in ci.yml"
+
+    full_suite_patterns = [
+        r"pytest\s+tests/?\b",         # `pytest tests` or `pytest tests/`
+        r"-m\s+pytest\s+tests/?\b",    # `python -m pytest tests/`
+    ]
+    matches_full_suite = any(
+        any(re.search(p, inv) for p in full_suite_patterns)
+        for inv in invocations
+    )
+    assert matches_full_suite, (
+        "ci.yml does not run the full tests/ suite. "
+        f"Found pytest invocations: {invocations}. "
+        f"Expected one to match one of {full_suite_patterns}."
+    )
+
+
+def test_ci_does_not_enumerate_individual_test_files() -> None:
+    """ci.yml should NOT enumerate >= 5 individual test files.
+
+    A small number of explicit `tests/test_*.py` references are allowed
+    (e.g. a hand-rolled smoke script invoked via `python` not `pytest`).
+    A large number indicates the F-3 pattern reappearing — drift caught.
+    """
+    content = CI_WORKFLOW.read_text()
+    # Match `test_<name>.py` whether wrapped in `tests/`, quoted, or bare.
+    explicit_files = re.findall(r"test_[a-zA-Z0-9_]+\.py", content)
+    # De-duplicate (some files may appear in multiple steps).
+    unique_files = set(explicit_files)
+    assert len(unique_files) < 5, (
+        f"ci.yml references {len(unique_files)} distinct test files "
+        f"explicitly: {sorted(unique_files)}. This regresses F-3 closure. "
+        f"Switch to `pytest tests/` to run the full suite."
+    )