From dcd16dfb5386f4d6417303acc9132e4d0e6026f1 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 12:33:13 -0400 Subject: [PATCH 01/26] feat(security): confine measure/sim subprocess exec (sandbox increments 1-2) Three exec sites (apply_measure, test_measure, sim _launch) now route through mcp_server/sandbox.py, gated by OSMCP_SANDBOX (default off = passthrough): - clean-env allowlist strips host env (secrets) from measure code - UID drop to unprivileged `sandbox` user + rlimits via _sandbox_exec shim - unconditional is_path_allowed() check on apply_measure(measure_dir) - OSMCP_SIM_TIMEOUT_SECONDS config added (enforcement pending) Dockerfile bakes the `sandbox` user. Landlock FS rules + seccomp net-deny land next in _sandbox_exec. Security PoC suite kept local (git-excluded); standalone security.yml workflow added (manual, no-op when suite absent). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/security.yml | 38 ++ docker/Dockerfile | 5 + docs/plans/measure-exec-sandbox.md | 341 ++++++++++++++++++ mcp_server/_sandbox_exec.py | 92 +++++ mcp_server/config.py | 33 ++ mcp_server/sandbox.py | 133 +++++++ .../skills/measure_authoring/operations.py | 3 + mcp_server/skills/measures/operations.py | 22 +- mcp_server/skills/simulation/operations.py | 10 +- 9 files changed, 671 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/security.yml create mode 100644 docs/plans/measure-exec-sandbox.md create mode 100644 mcp_server/_sandbox_exec.py create mode 100644 mcp_server/sandbox.py diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..8f87f81 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,38 @@ +name: security + +# Dedicated workflow for the security / sandbox-confinement suite, kept separate +# from the main `ci` shards so security tests live on their own. +# +# Manual-trigger only. The working exploit PoC (tests/test_sandbox.py) is +# deliberately kept OUT of the public repo (see .git/info/exclude) until the +# sandbox fix ships, so on a clean GitHub checkout this suite is a safe no-op — +# it runs only where the security tests are actually present (local / self-hosted +# checkout, or once the blocked-assertion suite is published). + +on: + workflow_dispatch: + +jobs: + security-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build image + run: docker build -f docker/Dockerfile -t openstudio-mcp:dev . + + - name: Run security suite (if present) + env: + RUN_OPENSTUDIO_INTEGRATION: "1" + MCP_SERVER_CMD: "openstudio-mcp" + run: | + mkdir -p runs + FILES=$(ls tests/test_sandbox.py tests/test_security_*.py 2>/dev/null || true) + if [ -z "$FILES" ]; then + echo "No security tests present in this checkout (PoC kept local) — nothing to run." + exit 0 + fi + echo "Running security tests: $FILES" + docker run --rm -v "$PWD:/repo" -v "$PWD/runs:/runs" \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + openstudio-mcp:dev bash -lc "cd /repo && pytest -vv $FILES" diff --git a/docker/Dockerfile b/docker/Dockerfile index 588ec84..528fae0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -7,6 +7,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates curl \ && rm -rf /var/lib/apt/lists/* +# Unprivileged account that confined measure/simulation subprocesses drop to +# (see mcp_server/_sandbox_exec.py). The server stays root; only the exec'd +# measure/EnergyPlus child runs as this uid when OSMCP_SANDBOX is enabled. +RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox + # ComStock measures (openstudio-standards-based templates for space types, # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB). ARG COMSTOCK_TAG=2025-3 diff --git a/docs/plans/measure-exec-sandbox.md b/docs/plans/measure-exec-sandbox.md new file mode 100644 index 0000000..a467936 --- /dev/null +++ b/docs/plans/measure-exec-sandbox.md @@ -0,0 +1,341 @@ +# Measure-exec sandbox: keep the tools, confine the subprocess + +**Status:** researched, proposed · **Supersedes** the deny-tools approach in +`safe-mode-disabled-tools.md` as the primary answer to external-review point 4. +(Denylist stays as an operator opt-out knob, but it removes core functionality — +not a real solution.) + +## Decision: follow the Codex CLI model + +Adopt OpenAI Codex CLI's architecture as the template (it solves our exact +problem: confine a native subprocess inside a container we don't control), with +one tightening: + +- **Landlock for the filesystem** — but **read-deny-by-default** (stricter than + Codex's read-allow-all). We can enumerate the few dirs `openstudio run` needs, + so we afford the stronger posture without breaking the happy path. +- **seccomp BPF to block network** — deny `socket(AF_INET/AF_INET6)`, allow + `AF_UNIX`. This is how Codex blocks net on kernels < 6.7. **Key consequence: + net-deny works at ANY Landlock ABI (incl. our WSL2 abi3), unprivileged, no + launch flags** — installing a seccomp filter needs only `no_new_privs` (which + we already set for Landlock), and `seccomp`/`prctl` are in Docker's default + allowlist. +- **no_new_privs + setuid drop + rlimits** underneath (the POSIX floor). +- **bwrap** only as an opportunistic upgrade (adds mount/PID/net namespaces) + where a startup userns probe passes — never the baseline. +- **Degrade loudly** — probe at startup, stamp the active tier into every exec's + audit line + tool output; `OSMCP_SANDBOX=off` must be explicit and is logged. + +Why Codex over Anthropic `sandbox-runtime`: srt's primary primitive is +bwrap/userns — exactly what stock Linux Docker seccomp blocks. Codex's +Landlock+seccomp needs no privileges and no launch flags, so it runs in the +operator's unmodified container. srt's richer egress allowlist (external proxy) +is overkill — we want net-deny, which a seccomp filter gives for free. + +**This collapses the tiers**: FS confinement (Landlock) + net-deny (seccomp) + +UID/rlimits (POSIX) all run together in stock Docker ≥23.0 with zero launch +flags. bwrap is a bonus, not a requirement. + +## Insight + +All arbitrary-code execution already funnels through 3 subprocess sites: + +| Site | Location | Cmd | Today | +|---|---|---|---| +| `apply_measure` | `skills/measures/operations.py:248` | `openstudio run --measures_only -w` | timeout 300s, full env, root | +| `test_measure` | `skills/measure_authoring/operations.py:1015` | `pytest` / `ruby` | timeout 60s, full env, root | +| `run_osw`/`run_simulation` | `skills/simulation/operations.py:265` (`_launch`) | `openstudio run -w` | **no timeout**, full env, root | + +The subprocess needs: ro on OpenStudio/EnergyPlus install, `/var/oscli` gems, +`/opt/*-measures`, `/inputs`, `/repo`; rw ONLY on its run dir. Nothing else. +That maps 1:1 onto a filesystem sandbox policy. Wrap these 3 sites → full +functionality, contained blast radius. + +## Why not the alternatives + +- **WASM (pydantic mcp-run-python style):** can't run native EnergyPlus/Ruby. Out. +- **microVMs (E2B/Modal/Firecracker/gVisor):** need host control / KVM; we're a + process inside someone's container. Host-level gVisor stays a docs recommendation. +- **Static analysis of measure code:** bypassable, not security. +- **NREL OpenStudio-server prior art:** none — zero sandboxing, single-tenant trust + model. We'd be ahead of upstream. +- **Closest prior art:** OpenAI Codex CLI (Landlock + seccomp, bwrap where possible, + explicit degrade) and Anthropic sandbox-runtime (bwrap + seccomp + net proxy). + +## Design: tiered sandbox, probed at startup, best tier wins + +One wrapper at the 3 chokepoints, e.g. `mcp_server/sandbox_exec.py` (or a +`sandboxed_run()` drop-in for `subprocess.run`/`Popen`): + +**Tier 0 — POSIX floor (always on):** +- Drop root → dedicated `sandbox` UID via wrapper exec (`setpriv --reuid --regid + --clear-groups --no-new-privs`) — NOT `preexec_fn` (fork-safety in threaded server) +- Run dirs `0700` sandbox-owned; install/mounts root-owned ro +- rlimits: `RLIMIT_CPU`, `RLIMIT_FSIZE` (caps SQL/ESO blowups), `RLIMIT_NPROC`, + `RLIMIT_AS` (generous — EnergyPlus legitimately uses GBs), `RLIMIT_NOFILE` +- Clean env: pass explicit allowlist (`PATH`, `RUBYLIB`, `ENERGYPLUS_EXE_PATH`, + bundler vars, `HOME=`, `TMPDIR=/tmp`) — today + `os.environ.copy()` leaks everything +- `start_new_session=True` + kill process group on timeout/cancel + +**Tier 1 — Landlock LSM (default kernel sandbox):** +- Works inside default-seccomp Docker ≥23.0, unprivileged, zero launch flags — + the only kernel sandbox with that property (`landlock_*` syscalls allowlisted + since moby 2022; verified in current default profile) +- Policy: ro+exec `/usr`, `/lib*`, `/usr/local/openstudio-*`, `/var/oscli`, + `/opt/comstock-measures`, `/opt/common-measures`, `/inputs`, `/repo`; + rw only ``; requires `prctl(PR_SET_NO_NEW_PRIVS)` first +- Landlock ABI ≥4 (kernel 6.7) can also deny TCP natively; WSL2 dev kernel = + ABI 3 (FS only, verified). We do NOT rely on this for network — see seccomp. +- Implement: ~100-line ctypes module we own (PyPI `landlock` pkg is 27★/dev + release; `landrun` CLI is Go binary) — syscall surface is 3 calls + +**Tier 1b — seccomp BPF net-deny (rides with Landlock):** +- BPF filter returning `EAFNOSUPPORT` for `socket(AF_INET/AF_INET6)`, allowing + `AF_UNIX` (so the OpenStudio bundler / local IPC still works). Installed in the + same wrapper right after `no_new_privs`. +- Works at ANY kernel/ABI in stock Docker — this is our network control, not + Landlock-abi4 or bwrap. (Codex's exact mechanism.) +- Implement: raw BPF in the wrapper shim, or `pyseccomp`/libseccomp if we accept + the dep. Lean raw-BPF to stay dep-free and grepable. + +**Tier 2 — bubblewrap (opportunistic upgrade):** +- `bwrap --unshare-all --clearenv --die-with-parent` + ro-binds + run-dir bind: + adds mount-ns illusions + **network-ns (no net even on ABI 3 kernels)** +- Blocked by stock Docker seccomp on Linux hosts (`clone(CLONE_NEWUSER)` denied + without CAP_SYS_ADMIN; moby#42441 still open). Docker Desktop/WSL2 ships + seccomp-unconfined → works there today (probed and confirmed in our dev container) +- Enable only when startup userns probe passes; document operator flags + (custom seccomp profile > `--cap-add SYS_ADMIN`; Ubuntu 24.04 hosts also need + AppArmor userns relief) for Linux deployments that want tier 2 + +**Probe at startup, audit one `sandbox_tier` event, and report the active tier +in tool responses** (`"sandbox": "bwrap" | "landlock-abi3" | "posix" | "none"`) +— degradation must be visible, never silent (Codex pattern). + +## Orthogonal quick wins (do regardless) + +1. `is_path_allowed()` check on `apply_measure(measure_dir=...)` — today accepts + any path holding a `measure.rb` (other users' runs!) +2. Timeout (or operator-config max walltime) on `run_osw`/`run_simulation` — + only manual cancel today +3. Clean env allowlist (tier 0 item, but ship first — trivial, closes secret leakage) + +## Optional human gate (HITL, not containment) + +MCP elicitation: FastMCP `ctx.elicit()` ≥2.10; Claude Code supports it ≥v2.1.76, +Claude Desktop does NOT yet. Pattern: show measure code diff → accept/decline +before exec. Fallback for non-supporting clients: return +`{"needs_approval": true, "code_sha256": ...}` → second call with +`approved_sha256=`. Complements sandbox; doesn't replace it (the LLM writes the +code the human skims). Defer to v2 unless an operator asks. + +## Relation to safe-mode denylist + +Keep `OSMCP_DISABLED_TOOLS` / `OSMCP_SAFE_MODE` as a small operator knob +(some deployments genuinely want measure-authoring off), but the sandbox is the +primary control: tools stay on by default because their execution is confined. + +## Implementation order + +1. Quick wins (env allowlist, `measure_dir` path check, sim timeout) — small + **— DONE (increment 1):** `OSMCP_SANDBOX` knob + `mcp_server/sandbox.py` + clean-env allowlist wired into all 3 exec sites (`apply_measure`, + `test_measure`, sim `_launch`); unconditional `is_path_allowed` check on + `apply_measure(measure_dir=)`; `OSMCP_SIM_TIMEOUT_SECONDS` config added. + No Docker rebuild (pure Python). Default `off` = passthrough, so the existing + suite is untouched. Verified: clean-env hides the canary under `posix` and a + normal run still succeeds; traversal rejected; `test_measures` green. + **Still pending in this bucket:** wire the sim-timeout *enforcement* into the + dispatcher (config constant exists; `_launch`/dispatch loop must kill runs + past the cap via the existing SIGTERM→SIGKILL path). +2. POSIX floor: setuid `sandbox` user + rlimits via `setpriv` wrapper — small/med + (needs Docker rebuild: `useradd sandbox`). Closes runs-as-root + rlimits. +3. Landlock ctypes module + seccomp net-deny + probe + tier reporting — medium. + Closes filesystem escape + network exfil. +4. bwrap backend + userns probe + operator docs — medium +5. Elicitation gate — defer +6. Flip default `OSMCP_SANDBOX` off → `auto` once the full suite passes confined. + +## Testing strategy + +Goal: tests that (a) **prove the holes exist today** without touching anything +real, and (b) **prove each is closed** after the fix. Both halves run in CI. + +**Status (confirm-now half landed):** `tests/test_sandbox.py` — 3 integration +tests, CI shard 2, all passing in ~11s. Confirms today (OSMCP_SANDBOX=off): +measures run as root (uid 0), the server's env secret leaks in, reads/writes +escape the run dir, network exfil to a localhost canary succeeds, and +`apply_measure` accepts a `measure_dir` outside all allowed roots. Probes are +generated at runtime via `create_measure` (Ruby + Python) so the real +create→apply path is exercised. The `OSMCP_SANDBOX=auto` "blocked" counterparts +are added with the fix. + +### Core principle — dual-run falsifiability + +Every security test runs the SAME attack measure twice: +- `OSMCP_SANDBOX=off` → attack must **SUCCEED**. Proves the hole is real today + AND proves the attack mechanism actually fires. A sandbox test that can't show + the attack succeeding unsandboxed is vacuous (green for the wrong reason). +- `OSMCP_SANDBOX=auto` (best active tier) → attack must be **BLOCKED**, with the + violation visible in tool output. + +A fix is "proven" only when the off-run breaches and the on-run blocks. The +off-run is the regression guard against vacuous-green. + +### Safety — canaries + decoys, never real targets + +No test touches a real secret, real sensitive file, or real network: +- **Decoy "victim" run** — fixture creates `runs/_decoy_victim/{run_record.json, + secret.txt}` (known content), a SIBLING of the granted run_dir → reading/writing + it from inside the sandbox is a genuine escape, but it sits on the disposable + runs volume and the fixture deletes it. (cross-user read/write.) +- **Canary env secret** — launch the test server with + `OSMCP_CANARY_SECRET=do-not-leak-`. A fake value, never a real credential. + (the `os.environ.copy()` leak.) +- **Localhost canary listener** — throwaway TCP server on `127.0.0.1:`; + measure tries to POST the canary to it. Nothing leaves the machine. (net exfil.) +- **Bounded resource probes (critical)** — the off/control run must NEVER run an + unbounded bomb. Fork test forks a fixed modest N (e.g. 64) and checks they + succeed → infers "no NPROC cap" without a real fork bomb. Disk test writes a + fixed small file in the control to confirm "no FSIZE cap" without filling disk. + CPU/memory controls are SKIPPED (can't bound an unlimited spin/alloc safely); + on-runs allocate/spin just past the configured cap and assert the cap fires. + Outer pytest timeout + an outer rlimit on the test process backstop everything. + +All attack measures are committed fixtures under `tests/assets/sandbox_probes/`, +run only inside the disposable container. + +### How a probe measure reports outcome + +Each attack measure attempts the action, rescues the error, and +`runner.registerInfo("PROBE result=")` — `apply_measure` returns +runner messages, so the test asserts on them. The AUTHORITATIVE check is external +ground truth (decoy file untouched / listener received nothing / recorded euid), +not the measure's self-report. Two independent assertions per test. + +### Per-issue test matrix + +| # | Issue | Attack measure does | off (today) | on (fixed) | min tier | +|---|---|---|---|---|---| +| 1 | Write escape | `File.write` to decoy sibling dir | write succeeds, decoy modified | EACCES, decoy unchanged | landlock | +| 2 | Read other user's run | `File.read` decoy `secret.txt` | returns known content | EACCES, content absent from output | landlock | +| 3 | Env secret leak | dump ENV to run_dir | canary present | canary absent | posix (clean env) | +| 4 | Runs as root | record `Process.uid` | uid==0 | uid==1001 (sandbox) | posix | +| 5 | Network exfil | TCP connect+POST canary to 127.0.0.1 | listener receives canary | connection refused, nothing received | seccomp | +| 6 | Fork storm | fork N=64 (bounded) | all succeed | blocked past NPROC cap | posix (rlimit) | +| 7 | Disk bomb | write past FSIZE cap in run_dir | (control: small file, no cap) | SIGXFSZ/EFBIG at cap | posix (rlimit) | +| 8 | CPU spin | busy loop | (control skipped) | SIGXCPU at CPU cap | posix (rlimit) | +| 9 | Memory bomb | alloc past AS cap | (control skipped) | ENOMEM at cap | posix (rlimit) | +| 10 | No sim timeout | sleep 60s, cap=5s | (control: outer timeout) | killed ~5s, status failed/timeout | walltime mgr | +| 11 | measure_dir traversal | `apply_measure(measure_dir=)` | copies/executes | `{ok:false, "not allowed"}` | path check (no exec) | + +### Tier-awareness (honest skips, not vacuous passes) + +Active tier depends on the runner's kernel/Docker: +- Local WSL2 = landlock-abi3 + seccomp (+ bwrap if userns probe passes on Docker + Desktop's unconfined daemon). +- CI ubuntu-24.04 = Docker ≥23 / kernel 6.x → landlock-abi4 + seccomp; bwrap + blocked by default seccomp. + +Each test declares its min tier (matrix col). If active tier < needed → **SKIP +with a loud reason** ("net confinement needs seccomp; active=posix"), never +silent-pass. Plus a **guard test**: in the integration env assert active tier ∈ +{landlock-abi3, landlock-abi4, bwrap}; FAIL if posix/none — that would mean the +sandbox silently didn't engage and the whole suite is vacuous. + +### Tier-probe unit tests + +Force each tier path, assert the correct argv is built (setpriv / landlock-shim / +bwrap) and that the probe downgrades correctly when a syscall is stubbed to fail. +Unit tier (no `openstudio` import). + +### Repo compliance + +- Integration tier (real `openstudio` CLI, mock nothing), + `RUN_OPENSTUDIO_INTEGRATION=1`, AAA structure, `# Regression:`/`# Validates:` + comments, exact assertions incl. error-message text, `parametrize` the issue + set, add to the lightest CI shard. +- New file `tests/test_sandbox.py` + fixtures `tests/assets/sandbox_probes/`. + +## Resolved decisions (defaults) + +Net effect: works in stock Docker, no launch flags, blast radius = the run dir, +with operator knobs for looser (trusted/BCL) or tighter (cgroup memory) needs. + +1. **Min Docker version → don't gate; probe-and-degrade. Document 23.0 as the FS + line.** seccomp + UID + rlimits work on ~any Docker (those syscalls + default-allowed for years); only Landlock FS confinement needs ≥23.0. A hard + minimum would block legit operators for no safety gain — on old Docker you + lose FS-deny but keep net-deny + UID + rlimits, and the tier report says so. + +2. **One shared `sandbox` UID for v1, not per-run ephemeral.** Landlock isolates + the filesystem *per process* — two runs sharing UID 1001 each get a ruleset + granting only their own run_dir, so cross-run file access is blocked by + Landlock, not DAC; the UID barely matters for the main threat. Per-run UIDs + add a UID pool + chown + cleanup for little gain at `MAX_CONCURRENCY=1` + (current default). Close the same-UID residual (ptrace/signal between + concurrent runs) by adding `ptrace` to the seccomp deny list. Shared + `RLIMIT_NPROC` accounting is a minor co-tenant DoS, irrelevant at concurrency + 1. **Upgrade trigger:** revisit per-run UIDs only if a multi-user deployment + raises per-user concurrency > 1. + +3. **seccomp net-deny via libseccomp (`pyseccomp`), not hand-rolled BPF.** + Security-critical filter, and we ship **amd64 + arm64** — raw BPF must + hand-handle `AUDIT_ARCH` + per-arch syscall numbers for both, and a + subtly-wrong filter fails open. libseccomp is what Docker itself uses, + abstracts the arch mess, and `libseccomp2` is already in the base image. The + repo's dep-minimization rule targets heavyweight *unnecessary* deps (the + OpenLLMetry case), not reimplementing a security primitive across two arches. + (Raw BPF acceptable ONLY with explicit amd64+arm64 AUDIT_ARCH handling + a + fail-closed test per arch — not worth it.) + +4. **rlimits = generous anti-runaway backstops, operator-tunable env vars; not + tight quotas.** A tight cap silently breaks real sims (EnergyPlus uses GBs, + annual SQL is hundreds of MB–GB) — worse than no cap for the modeler. + - `RLIMIT_AS`: **avoid as primary** — counts virtual address space (mmap'd + libs), breaks allocators. Unset or ~24 GB backstop; push real memory control + to container cgroup (`docker run --memory`), document that. + - `RLIMIT_FSIZE`: ~10 GB per-file runaway guard. Real disk control = retention + GC + per-run-dir quota (cgroup/quota territory, rlimit can't express). + - `RLIMIT_NPROC` ~512, `RLIMIT_NOFILE` ~4096, `RLIMIT_CPU` ~2× the existing + 300 s measure timeout. + +5. **Sim walltime cap: add `OSMCP_SIM_TIMEOUT_SECONDS`, default 7200 (2 h), + 0 = unlimited.** Real gap today (sims hang forever; manual cancel only). 2 h + is a hang-catcher, not a tight bound — big annual/parametric runs take + 10–30+ min, so a low default would silently kill legit work. Reuse the + dispatcher's existing SIGTERM→SIGKILL path (`cancel_run`). Fire loudly: + status=`timeout`, clear error. + +6. **Own ctypes Landlock module, don't vendor `landrun`.** 3 syscalls, ~100 + lines, fully grepable/auditable — matches house style ("everything direct, + visible in stack traces"). `landrun` is a young (Mar 2025) external Go binary + = supply-chain dep + vendoring across 2 arches + another thing to track, for + security-critical code we should own. PyPI `landlock` pkg is dev-release/27★ — + don't depend on that either. (Per-arch syscall-number care as in #3, but + Landlock numbers are stable.) + +7. **Sandbox `test_measure` too, same wrapper.** pytest/ruby on LLM-authored + measure code is the same arbitrary-exec threat as `apply_measure` — "test this + measure" *is* the trust-the-code moment. Its legit needs (own test fixtures, + test model) live in granted dirs. Caveat: a test that legitimately hits the + network (BCL download) breaks under net-deny — see #8. + +8. **Default net-deny; expose `OSMCP_SANDBOX_NET=deny|allow`; confirm + empirically.** `openstudio run -w` runs EnergyPlus locally and `--bundle` + resolves from the offline gem path → should need no AF_INET, so net-deny + shouldn't break normal runs — **but verify, don't assume** (smoke test: a + normal sim succeeds under net-deny). The real exception is BCL-downloading + measures (need AF_INET); blocking them is *correct* for untrusted mode but + trusted deployments want them — hence the toggle. AF_UNIX always allowed + (local IPC/bundler). + +## Still open (need your input) + +- Exact rlimit/timeout default *values* above — sane for your largest real + models? (chosen as backstops, not measured against a worst-case OSM) +- Container-level memory cgroup: document-only, or ship a recommended + `--memory` in compose / run docs? +- Per-run-dir disk quota — out of scope for v1, or needed now? (rlimit can't do + it; needs cgroup/quota) diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py new file mode 100644 index 0000000..fa5ae1e --- /dev/null +++ b/mcp_server/_sandbox_exec.py @@ -0,0 +1,92 @@ +"""Privilege-dropping exec shim for confined measure / simulation subprocesses. + +Invoked by mcp_server.sandbox.wrap_cmd as:: + + python3 -m mcp_server._sandbox_exec --uid N --gid N \ + [--rlimit-fsize BYTES] [--rlimit-nproc N] [--rlimit-nofile N] \ + [--rlimit-cpu SECONDS] [--rlimit-as BYTES] -- CMD [ARG...] + +Runs as root, applies rlimits, drops to the unprivileged uid/gid, sets +``no_new_privs``, then ``execvp``s CMD. A standalone exec — NOT a Popen +``preexec_fn`` — so there is no fork-safety hazard in the threaded server, and +the dropped image keeps the same pid (so the dispatcher's existing +terminate/kill-by-pid path still works). Landlock FS rules + a seccomp net-deny +filter will be added here in the next increment, behind the same wrapper. +""" +from __future__ import annotations + +import argparse +import ctypes +import os +import resource +import sys + +_RLIMITS = { + "cpu": resource.RLIMIT_CPU, # CPU-seconds + "fsize": resource.RLIMIT_FSIZE, # max single-file bytes + "nproc": resource.RLIMIT_NPROC, # processes/threads for the uid + "nofile": resource.RLIMIT_NOFILE, # open file descriptors + "as": resource.RLIMIT_AS, # virtual address space bytes +} + +_PR_SET_NO_NEW_PRIVS = 38 + + +def _set_no_new_privs() -> None: + """Promise the kernel we will never gain privileges via setuid binaries. + + Prerequisite for the unprivileged Landlock/seccomp backends landing next; set + here so the contract holds from the POSIX tier onward. + """ + libc = ctypes.CDLL("libc.so.6", use_errno=True) + if libc.prctl(_PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0: + raise OSError(ctypes.get_errno(), "prctl(PR_SET_NO_NEW_PRIVS) failed") + + +def main(argv: list[str]) -> int: + if "--" not in argv: + print("sandbox-exec: missing '-- CMD'", file=sys.stderr) + return 2 + sep = argv.index("--") + cmd = argv[sep + 1:] + if not cmd: + print("sandbox-exec: empty command", file=sys.stderr) + return 2 + + ap = argparse.ArgumentParser(add_help=False) + ap.add_argument("--uid", type=int, required=True) + ap.add_argument("--gid", type=int, required=True) + for name in _RLIMITS: + ap.add_argument(f"--rlimit-{name}", type=int, default=0) + ns = ap.parse_args(argv[:sep]) + + # rlimits first, while still privileged (lowering hard limits is always ok). + for name, res_id in _RLIMITS.items(): + val = getattr(ns, f"rlimit_{name}") + if val and val > 0: + try: + resource.setrlimit(res_id, (val, val)) + except (ValueError, OSError) as e: + print(f"sandbox-exec: rlimit {name}={val} failed: {e}", file=sys.stderr) + + _set_no_new_privs() + + # Drop privileges: supplementary groups, then gid, then uid (order matters — + # setgroups/setgid require privilege we lose after setuid). + os.setgroups([ns.gid]) + os.setgid(ns.gid) + os.setuid(ns.uid) + if os.getuid() != ns.uid or os.geteuid() != ns.uid: + print("sandbox-exec: uid drop did not stick", file=sys.stderr) + return 2 + + try: + os.execvp(cmd[0], cmd) # noqa: S606 - intentional shell-less exec of the confined command + except OSError as e: + print(f"sandbox-exec: exec {cmd[0]} failed: {e}", file=sys.stderr) + return 127 + return 0 # unreachable + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/mcp_server/config.py b/mcp_server/config.py index 4c50bc5..22a20aa 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -58,6 +58,39 @@ def _safe_float(env_val: str, default: float) -> float: ENABLE_CODE_MODE = os.environ.get("OSMCP_CODE_MODE", "").lower() in ("1", "true") +# Subprocess confinement for measure/simulation execution (see mcp_server/sandbox.py). +# off — full passthrough (current behaviour / explicit escape hatch) +# posix — clean-env allowlist (+ UID drop & rlimits in later increments) +# auto — best confinement available (currently == posix) +# Default off during rollout; flips to auto once the full suite passes confined. +SANDBOX_MODE = os.environ.get("OSMCP_SANDBOX", "off").strip().lower() +# Network policy for confined subprocesses: deny (default) blocks outbound TCP +# once the seccomp backend lands; allow leaves it open (trusted/BCL deployments). +SANDBOX_NET = os.environ.get("OSMCP_SANDBOX_NET", "deny").strip().lower() + +# Wall-clock cap for a single simulation (run_osw/run_simulation). 0 = no cap. +SIM_TIMEOUT_SECONDS = _safe_float( + os.environ.get("OPENSTUDIO_MCP_SIM_TIMEOUT_SECONDS", + os.environ.get("OSMCP_SIM_TIMEOUT_SECONDS", "7200")), + 7200.0, +) + +# Unprivileged account confined subprocesses drop to (baked into the image as +# `sandbox`). See mcp_server/_sandbox_exec.py. +SANDBOX_UID = _safe_int(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001) +SANDBOX_GID = _safe_int(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001) + +# rlimit backstops for confined subprocesses (0 = off). Generous by design — they +# catch runaway bombs, not tune normal use. CPU/AS default off: a CPU-second cap +# would kill long annual sims, and RLIMIT_AS (virtual address space) breaks +# EnergyPlus/allocators — real memory control belongs to the container cgroup. +_GB = 1024 ** 3 +SANDBOX_RLIMIT_FSIZE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_FSIZE", str(10 * _GB)), 10 * _GB) +SANDBOX_RLIMIT_NPROC = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NPROC", "1024"), 1024) +SANDBOX_RLIMIT_NOFILE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NOFILE", "0"), 0) +SANDBOX_RLIMIT_CPU = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_CPU", "0"), 0) +SANDBOX_RLIMIT_AS = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_AS", "0"), 0) + # Shared roots are read-only for everyone; the only per-user writable area is # RUN_ROOT/ (see user_run_root). Writes elsewhere are denied. _SHARED_READ_ROOTS = [ diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py new file mode 100644 index 0000000..119bee6 --- /dev/null +++ b/mcp_server/sandbox.py @@ -0,0 +1,133 @@ +"""Confinement for the measure / simulation subprocesses (Codex-style, tiered). + +Everything an applied measure or a simulation runs goes through three subprocess +sites — `apply_measure`, `test_measure`, and the sim dispatcher's `_launch`. +They used to inherit the server's full environment via ``os.environ.copy()``. +This module is the single chokepoint that confines them. + +`OSMCP_SANDBOX` (config.SANDBOX_MODE) selects the mode: + off — full passthrough (current behaviour / explicit escape hatch) + posix — clean-env allowlist (this increment); UID drop + rlimits + Landlock + FS policy + seccomp net-deny arrive in later increments, same knob + auto — best confinement available (currently == posix) + +`off` is a true passthrough so an operator can deliberately disable confinement +(the Codex ``danger-full-access`` model); the security PoC suite pins it to prove +the holes still exist when off. + +Increment status: clean-env floor (build_env) + UID drop & rlimits (wrap_cmd / +prepare_workdir, via _sandbox_exec). Landlock FS rules + seccomp net-deny land in +_sandbox_exec next. `active_tier()` reports what is in effect so degradation is +visible (never silent). +""" +from __future__ import annotations + +import contextlib +import os +import sys +from pathlib import Path + +from mcp_server.config import ( + SANDBOX_GID, + SANDBOX_MODE, + SANDBOX_RLIMIT_AS, + SANDBOX_RLIMIT_CPU, + SANDBOX_RLIMIT_FSIZE, + SANDBOX_RLIMIT_NOFILE, + SANDBOX_RLIMIT_NPROC, + SANDBOX_UID, +) + +# Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately +# needs. Everything else — notably any host secret — is dropped in confined modes. +_ENV_ALLOW_EXACT = frozenset({ + "PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "TERM", "RUBYOPT", + "RUBYLIB", "RUBY_VERSION", "ENERGYPLUS_EXE_PATH", + "OS_BUNDLER_VERSION", "RC_RELEASE", + "VENV_PATH", "PYTHONUNBUFFERED", + "COMSTOCK_MEASURES_DIR", "COMMON_MEASURES_DIR", "SKILLS_DIR", + "OSCLI_GEMFILE", "OSCLI_GEM_PATH", + "OSMCP_RUN_ROOT", "OPENSTUDIO_MCP_RUN_ROOT", +}) +_ENV_ALLOW_PREFIXES = ("BUNDLE_", "GEM_", "LC_") + + +def enabled() -> bool: + """True when any confinement is active (i.e. OSMCP_SANDBOX is not 'off').""" + return SANDBOX_MODE not in ("", "off", "0", "false", "no") + + +def active_tier() -> str: + """The confinement tier actually in effect — surfaced in tool output.""" + if not enabled(): + return "off" + return "posix" # later increments: "landlock" / "bwrap" + + +def build_env(work_dir: Path | str) -> dict[str, str]: + """Environment for a confined subprocess. + + Disabled → full passthrough (``os.environ.copy()``), preserving today's + behaviour. Enabled → only allowlisted variables survive and HOME/TMPDIR are + pinned into the work dir, so no host secret reaches measure code and caches + stay inside the run directory (which the FS backend will later confine). + """ + if not enabled(): + return os.environ.copy() + work = Path(work_dir) + env = { + k: v for k, v in os.environ.items() + if k in _ENV_ALLOW_EXACT or k.startswith(_ENV_ALLOW_PREFIXES) + } + env["HOME"] = str(work) + tmp = work / "tmp" + try: + tmp.mkdir(parents=True, exist_ok=True) + env["TMPDIR"] = str(tmp) + except OSError: + pass + return env + + +def wrap_cmd(cmd: list[str]) -> list[str]: + """Wrap a subprocess argv to run under the privilege-dropping exec shim. + + Disabled → returns the command unchanged (today's behaviour). Enabled → runs + it via `python3 -m mcp_server._sandbox_exec`, which drops to the unprivileged + sandbox uid, applies rlimits, sets no_new_privs, then execs the command (so + the pid is preserved and the dispatcher's kill-by-pid path still works). + """ + if not enabled(): + return list(cmd) + wrapped = [ + sys.executable, "-m", "mcp_server._sandbox_exec", + "--uid", str(SANDBOX_UID), "--gid", str(SANDBOX_GID), + ] + for flag, value in ( + ("--rlimit-fsize", SANDBOX_RLIMIT_FSIZE), + ("--rlimit-nproc", SANDBOX_RLIMIT_NPROC), + ("--rlimit-nofile", SANDBOX_RLIMIT_NOFILE), + ("--rlimit-cpu", SANDBOX_RLIMIT_CPU), + ("--rlimit-as", SANDBOX_RLIMIT_AS), + ): + if value and value > 0: + wrapped += [flag, str(value)] + return [*wrapped, "--", *cmd] + + +def prepare_workdir(work_dir: Path | str) -> None: + """Hand the work dir to the sandbox account so the dropped process can write. + + No-op when confinement is off. The server is root, so it chowns the run dir + (recursively) to the sandbox uid before exec; the root server still reads the + outputs afterwards. Bind mounts that ignore ownership (e.g. Docker Desktop) + make this a harmless no-op there — confinement still holds for in-image paths. + """ + if not enabled(): + return + work = Path(work_dir) + with contextlib.suppress(OSError): + os.chown(work, SANDBOX_UID, SANDBOX_GID) + for path in work.rglob("*"): + with contextlib.suppress(OSError): + os.chown(path, SANDBOX_UID, SANDBOX_GID) diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 5ef1ed9..8b415bc 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -13,6 +13,7 @@ import openstudio +from mcp_server import sandbox from mcp_server.config import INPUT_ROOT, user_run_root @@ -1016,6 +1017,7 @@ def test_measure_op( proc = subprocess.run( ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"], cwd=str(mdir), + env=sandbox.build_env(mdir), capture_output=True, text=True, timeout=60, check=False, ) else: @@ -1026,6 +1028,7 @@ def test_measure_op( proc = subprocess.run( ["ruby", "-I", ".", str(test_files[0])], cwd=str(mdir), + env=sandbox.build_env(mdir), capture_output=True, text=True, timeout=60, check=False, ) diff --git a/mcp_server/skills/measures/operations.py b/mcp_server/skills/measures/operations.py index 8cca748..da641dc 100644 --- a/mcp_server/skills/measures/operations.py +++ b/mcp_server/skills/measures/operations.py @@ -16,7 +16,13 @@ import openstudio -from mcp_server.config import OSCLI_GEM_PATH, OSCLI_GEMFILE, user_run_root +from mcp_server import sandbox +from mcp_server.config import ( + OSCLI_GEM_PATH, + OSCLI_GEMFILE, + is_path_allowed, + user_run_root, +) from mcp_server.model_manager import get_model, load_model from mcp_server.util import resolve_run_dir @@ -141,6 +147,11 @@ def apply_measure( measure_path = Path(measure_dir) if not measure_path.is_dir(): return {"ok": False, "error": f"Measure directory not found: {measure_dir}"} + # Access control: only run measures from allowed roots (own run area or a + # shared read root — repo, inputs, bundled measures). Refuses arbitrary + # filesystem paths / another user's area before any copy or execution. + if not is_path_allowed(measure_path): + return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"} # Check measure script exists (Ruby or Python) has_rb = (measure_path / "measure.rb").is_file() @@ -247,13 +258,18 @@ def apply_measure( "--bundle_without", "native_ext"] cmd += ["run", run_flag, "-w", str(osw_path)] log_path = run_dir / "openstudio.log" + # Build env first (it creates run_dir/tmp for TMPDIR as root), THEN hand + # the whole run dir — tmp included — to the sandbox uid, so the dropped + # process can write its temp/output. Order matters. + run_env = sandbox.build_env(run_dir) + sandbox.prepare_workdir(run_dir) with open(log_path, "w", encoding="utf-8") as log_f: proc = subprocess.run( - cmd, + sandbox.wrap_cmd(cmd), cwd=str(run_dir), stdout=log_f, stderr=subprocess.STDOUT, - env=os.environ.copy(), + env=run_env, timeout=300, # 5 minute timeout check=False, ) diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index 4ec5114..2145eed 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -3,7 +3,6 @@ import contextlib import json -import os import shutil import subprocess import tempfile @@ -17,6 +16,7 @@ import psutil +from mcp_server import sandbox from mcp_server.audit import audit from mcp_server.config import ( LOG_TAIL_DEFAULT, @@ -265,12 +265,16 @@ def _build_run_cmd(osw_path: Path) -> list[str]: def _launch(rec: RunRecord) -> None: """Start the EnergyPlus subprocess for a queued run. Caller holds _sim_lock.""" log = rec.run_dir / "openstudio.log" + # build_env creates run_dir/tmp (TMPDIR) as root; chown after so the dropped + # process owns it (order matters — see apply_measure). + run_env = sandbox.build_env(rec.run_dir) + sandbox.prepare_workdir(rec.run_dir) proc = subprocess.Popen( # noqa: S603 - cmd built from trusted config + staged OSW path - _build_run_cmd(rec.osw_path), + sandbox.wrap_cmd(_build_run_cmd(rec.osw_path)), cwd=str(rec.run_dir), stdout=log.open("w", encoding="utf-8"), stderr=subprocess.STDOUT, - env=os.environ.copy(), + env=run_env, ) rec.pid = proc.pid rec.status = "running" From ceafc91fb6217eba47f7229d32bdde37bdfbbff3 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 12:53:01 -0400 Subject: [PATCH 02/26] feat(security): Landlock FS + seccomp net-deny (sandbox increment 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full tier (OSMCP_SANDBOX=auto) closes the last two holes, on top of the POSIX floor, all via owned ctypes shims (no dependency, pure Python): - _landlock.py: read-deny-by-default FS policy — ro system roots, rw only the run dir (+ /dev for null/urandom). Blocks read-escape (even world-readable files) and write-escape, mount-independent. - _seccomp.py: raw single-arch cBPF denying socket(AF_INET/AF_INET6) with EAFNOSUPPORT (AF_UNIX/local IPC unaffected). Blocks outbound IP exfil. Applied in _sandbox_exec after no_new_privs, degrade-loudly; active_tier reports "landlock". Chose raw BPF over pyseccomp (broken wheel import). Verified under auto: read/write escape + net exfil blocked, apply + real EnergyPlus sim still succeed; comstock/common + weather suites green. Default stays off; flipping to auto (after a full-suite run under auto) is next. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/plans/measure-exec-sandbox.md | 45 ++++++--- mcp_server/_landlock.py | 103 +++++++++++++++++++++ mcp_server/_sandbox_exec.py | 17 ++++ mcp_server/_seccomp.py | 92 ++++++++++++++++++ mcp_server/sandbox.py | 40 +++++++- mcp_server/skills/measures/operations.py | 2 +- mcp_server/skills/simulation/operations.py | 2 +- 7 files changed, 283 insertions(+), 18 deletions(-) create mode 100644 mcp_server/_landlock.py create mode 100644 mcp_server/_seccomp.py diff --git a/docs/plans/measure-exec-sandbox.md b/docs/plans/measure-exec-sandbox.md index a467936..409accf 100644 --- a/docs/plans/measure-exec-sandbox.md +++ b/docs/plans/measure-exec-sandbox.md @@ -149,27 +149,44 @@ primary control: tools stay on by default because their execution is confined. **Still pending in this bucket:** wire the sim-timeout *enforcement* into the dispatcher (config constant exists; `_launch`/dispatch loop must kill runs past the cap via the existing SIGTERM→SIGKILL path). -2. POSIX floor: setuid `sandbox` user + rlimits via `setpriv` wrapper — small/med - (needs Docker rebuild: `useradd sandbox`). Closes runs-as-root + rlimits. -3. Landlock ctypes module + seccomp net-deny + probe + tier reporting — medium. - Closes filesystem escape + network exfil. -4. bwrap backend + userns probe + operator docs — medium +2. POSIX floor: UID drop + rlimits — **DONE (increment 2):** `sandbox` user baked + into the image (`useradd -u 1001`); `mcp_server/_sandbox_exec.py` shim drops + root→1001, applies rlimits (FSIZE 10G / NPROC 1024 on; CPU/AS off), sets + no_new_privs, execs (pid preserved); `wrap_cmd`/`prepare_workdir` in + `sandbox.py`. Verified: measure runs as uid 1001, NPROC rlimit applied, DAC + blocks root-owned write; real EnergyPlus sim runs fine under `posix`. +3. Landlock FS + seccomp net-deny — **DONE (increment 3):** own ctypes modules + `_landlock.py` (read-deny-by-default; ro system roots, rw only the run dir + + /dev) and `_seccomp.py` (raw single-arch cBPF: deny `socket(AF_INET/INET6)`, + EAFNOSUPPORT). Applied in the shim after no_new_privs, degrade-loudly. Pure + Python — no rebuild; chose raw BPF over pyseccomp (its wheel imports broken + here). `OSMCP_SANDBOX=auto` = full tier; `active_tier()` reports `landlock`. + Verified: read-escape (world-readable /opt file), write-escape, and network + exfil all blocked while apply + real sim still succeed; comstock/common + measures + weather (39 tests) green under `auto`. +4. bwrap backend + userns probe + operator docs — medium (optional upgrade) 5. Elicitation gate — defer -6. Flip default `OSMCP_SANDBOX` off → `auto` once the full suite passes confined. +6. Flip default `OSMCP_SANDBOX` off → `auto` — **next.** Gate: run the FULL + integration suite (all shards) under `auto` first to surface any remaining + Landlock allowlist gaps; add missing ro paths (no rebuild); then flip the + default (or set `OSMCP_SANDBOX=auto` in the deployment/Dockerfile ENV). + Still pending from bucket 1: wire sim-timeout *enforcement* into the dispatcher. ## Testing strategy Goal: tests that (a) **prove the holes exist today** without touching anything real, and (b) **prove each is closed** after the fix. Both halves run in CI. -**Status (confirm-now half landed):** `tests/test_sandbox.py` — 3 integration -tests, CI shard 2, all passing in ~11s. Confirms today (OSMCP_SANDBOX=off): -measures run as root (uid 0), the server's env secret leaks in, reads/writes -escape the run dir, network exfil to a localhost canary succeeds, and -`apply_measure` accepts a `measure_dir` outside all allowed roots. Probes are -generated at runtime via `create_measure` (Ruby + Python) so the real -create→apply path is exercised. The `OSMCP_SANDBOX=auto` "blocked" counterparts -are added with the fix. +**Status:** `tests/test_sandbox.py` — 9 integration tests, all passing (~34s). +Kept LOCAL / git-excluded (working exploit PoC), run via the standalone +`.github/workflows/security.yml`, NOT the main `ci` shards. Probes are generated +at runtime via `create_measure` (Ruby + Python) so the real create→apply path is +exercised. Coverage: +- off: all five holes confirmed (root, env leak, read/write escape, net exfil) +- posix: uid 1001, NPROC rlimit, DAC blocks root-owned write, env stripped +- auto (Landlock + seccomp): read-escape + write-escape + net exfil blocked, + run still succeeds +- unconditional: `measure_dir` traversal rejected ### Core principle — dual-run falsifiability diff --git a/mcp_server/_landlock.py b/mcp_server/_landlock.py new file mode 100644 index 0000000..c143109 --- /dev/null +++ b/mcp_server/_landlock.py @@ -0,0 +1,103 @@ +"""Minimal Landlock (LSM) filesystem confinement via raw syscalls (ctypes). + +Three syscalls, no external dependency (house style: grepable, owned). Applies a +read-deny-by-default policy to the current process and all its children: only the +enumerated read-only paths are readable/executable, and only the run dir is +writable. Requires no_new_privs (set by the caller) for unprivileged use; works +inside default-seccomp Docker >= 23.0 (the landlock_* syscalls are allowlisted +there). Returns the applied ABI, or 0 when Landlock is unavailable — the caller +degrades loudly rather than failing the run. +""" +from __future__ import annotations + +import ctypes +import os + +# Generic syscall table numbers (identical on x86_64 and aarch64). +_NR_CREATE_RULESET = 444 +_NR_ADD_RULE = 445 +_NR_RESTRICT_SELF = 446 + +_CREATE_RULESET_VERSION = 1 # query the ABI instead of creating a ruleset +_RULE_PATH_BENEATH = 1 + +# access_fs rights (bit positions). 0..12 = ABI 1, REFER = ABI 2, TRUNCATE = ABI 3. +_A_EXECUTE = 1 << 0 +_A_WRITE_FILE = 1 << 1 +_A_READ_FILE = 1 << 2 +_A_READ_DIR = 1 << 3 +_A_REMOVE_DIR = 1 << 4 +_A_REMOVE_FILE = 1 << 5 +_A_MAKE_CHAR = 1 << 6 +_A_MAKE_DIR = 1 << 7 +_A_MAKE_REG = 1 << 8 +_A_MAKE_SOCK = 1 << 9 +_A_MAKE_FIFO = 1 << 10 +_A_MAKE_BLOCK = 1 << 11 +_A_MAKE_SYM = 1 << 12 +_A_REFER = 1 << 13 # ABI >= 2 +_A_TRUNCATE = 1 << 14 # ABI >= 3 + +_RO = _A_EXECUTE | _A_READ_FILE | _A_READ_DIR +_RW_BASE = ( + _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_READ_DIR + | _A_REMOVE_DIR | _A_REMOVE_FILE | _A_MAKE_CHAR | _A_MAKE_DIR | _A_MAKE_REG + | _A_MAKE_SOCK | _A_MAKE_FIFO | _A_MAKE_BLOCK | _A_MAKE_SYM +) + + +class _RulesetAttr(ctypes.Structure): + # Only handled_access_fs — 8 bytes, valid on every ABID (kernel reads the + # field set implied by `size`); avoids the ABI>=4 net field tripping abi3. + _fields_ = [("handled_access_fs", ctypes.c_uint64)] + + +class _PathBeneathAttr(ctypes.Structure): + _pack_ = 1 + _fields_ = [("allowed_access", ctypes.c_uint64), ("parent_fd", ctypes.c_int32)] + + +def restrict(ro_paths: list[str], rw_paths: list[str]) -> int: + """Confine the filesystem to ro_paths (read+exec) and rw_paths (read+write). + + Returns the applied Landlock ABI (>=1), or 0 if Landlock is unavailable or + the ruleset could not be enforced. Missing paths are skipped silently. + """ + libc = ctypes.CDLL("libc.so.6", use_errno=True) + + abi = libc.syscall(_NR_CREATE_RULESET, None, 0, _CREATE_RULESET_VERSION) + if abi <= 0: + return 0 + + handled = _RW_BASE + if abi >= 2: + handled |= _A_REFER + if abi >= 3: + handled |= _A_TRUNCATE + + attr = _RulesetAttr(handled_access_fs=handled) + rs_fd = libc.syscall(_NR_CREATE_RULESET, ctypes.byref(attr), ctypes.sizeof(attr), 0) + if rs_fd < 0: + return 0 + + def _add(path: str, access: int) -> None: + try: + fd = os.open(path, os.O_PATH | os.O_CLOEXEC) + except OSError: + return # path absent — skip + try: + pba = _PathBeneathAttr(allowed_access=access & handled, parent_fd=fd) + libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) + finally: + os.close(fd) + + for p in ro_paths: + _add(p, _RO) + for p in rw_paths: + _add(p, handled) # full set for the writable run dir + + # no_new_privs is set by the caller; restrict_self enforces the ruleset on + # this process and everything it execs/forks. + rc = libc.syscall(_NR_RESTRICT_SELF, rs_fd, 0) + os.close(rs_fd) + return abi if rc == 0 else 0 diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py index fa5ae1e..ee23b24 100644 --- a/mcp_server/_sandbox_exec.py +++ b/mcp_server/_sandbox_exec.py @@ -58,6 +58,9 @@ def main(argv: list[str]) -> int: ap.add_argument("--gid", type=int, required=True) for name in _RLIMITS: ap.add_argument(f"--rlimit-{name}", type=int, default=0) + ap.add_argument("--landlock-ro", action="append", default=[]) + ap.add_argument("--landlock-rw", action="append", default=[]) + ap.add_argument("--seccomp-net", action="store_true") ns = ap.parse_args(argv[:sep]) # rlimits first, while still privileged (lowering hard limits is always ok). @@ -71,6 +74,20 @@ def main(argv: list[str]) -> int: _set_no_new_privs() + # Filesystem + network confinement (after no_new_privs, before the uid drop; + # both persist across setuid + exec and are inherited by children). Degrade + # loudly — a backend that cannot engage prints a notice and the run proceeds + # with whatever confinement did apply (the POSIX floor always holds). + if ns.landlock_ro or ns.landlock_rw: + from mcp_server import _landlock + abi = _landlock.restrict(ns.landlock_ro, ns.landlock_rw) + if not abi: + print("sandbox-exec: WARNING Landlock unavailable — FS not confined", file=sys.stderr) + if ns.seccomp_net: + from mcp_server import _seccomp + if not _seccomp.install_net_deny(): + print("sandbox-exec: WARNING seccomp net-deny unavailable — network not confined", file=sys.stderr) + # Drop privileges: supplementary groups, then gid, then uid (order matters — # setgroups/setgid require privilege we lose after setuid). os.setgroups([ns.gid]) diff --git a/mcp_server/_seccomp.py b/mcp_server/_seccomp.py new file mode 100644 index 0000000..cdf809b --- /dev/null +++ b/mcp_server/_seccomp.py @@ -0,0 +1,92 @@ +"""Minimal seccomp-BPF network deny via raw syscalls (ctypes), no dependency. + +Installs a classic-BPF seccomp filter that fails ``socket(AF_INET|AF_INET6, ...)`` +with EAFNOSUPPORT and leaves AF_UNIX (and every other syscall) untouched — +blocking outbound IP networking from measure code while local IPC / the bundler +still work. The program is built for the running arch (the shim executes on the +workload's arch), so there is no cross-arch branching to get wrong; an unexpected +arch makes install a no-op. Requires no_new_privs (set by the caller). + +Returns True if the filter was installed, False if unavailable — the caller +degrades loudly rather than failing the run. +""" +from __future__ import annotations + +import ctypes +import errno +import platform +import struct + +# BPF opcodes +_BPF_LD_W_ABS = 0x00 | 0x00 | 0x20 # BPF_LD | BPF_W | BPF_ABS +_BPF_JEQ_K = 0x05 | 0x10 | 0x00 # BPF_JMP | BPF_JEQ | BPF_K +_BPF_RET_K = 0x06 | 0x00 # BPF_RET | BPF_K + +# seccomp return actions +_SECCOMP_RET_ALLOW = 0x7FFF0000 +_SECCOMP_RET_ERRNO = 0x00050000 + +# seccomp_data field offsets (nr@0, arch@4, ip@8, args[0]@16 ...; little-endian) +_OFF_NR = 0 +_OFF_ARCH = 4 +_OFF_ARG0 = 16 + +# AUDIT_ARCH + __NR_socket per arch +_AUDIT_ARCH = {"x86_64": 0xC000003E, "aarch64": 0xC00000B7} +_NR_SOCKET = {"x86_64": 41, "aarch64": 198} + +_AF_INET = 2 +_AF_INET6 = 10 + +_PR_SET_SECCOMP = 22 +_SECCOMP_MODE_FILTER = 2 + + +def _stmt(code: int, k: int) -> bytes: + return struct.pack("=HBBI", code, 0, 0, k) + + +def _jump(code: int, k: int, jt: int, jf: int) -> bytes: + return struct.pack("=HBBI", code, jt, jf, k) + + +class _SockFprog(ctypes.Structure): + _fields_ = [("len", ctypes.c_ushort), ("filter", ctypes.c_void_p)] + + +def install_net_deny() -> bool: + mach = platform.machine() + if mach not in _AUDIT_ARCH: + return False + audit_arch = _AUDIT_ARCH[mach] + nr_socket = _NR_SOCKET[mach] + deny = _SECCOMP_RET_ERRNO | (errno.EAFNOSUPPORT & 0xFFFF) + + # Instruction layout (jt/jf are offsets from the NEXT instruction): + # 0 load arch + # 1 arch == ours ? -> 2 : ALLOW(8) + # 2 load nr + # 3 nr == socket ? -> 4 : ALLOW(8) + # 4 load args[0] (domain) + # 5 domain == AF_INET ? DENY(7) : 6 + # 6 domain == AF_INET6 ? DENY(7) : ALLOW(8) + # 7 RET deny + # 8 RET allow + program = b"".join([ + _stmt(_BPF_LD_W_ABS, _OFF_ARCH), + _jump(_BPF_JEQ_K, audit_arch, 0, 6), + _stmt(_BPF_LD_W_ABS, _OFF_NR), + _jump(_BPF_JEQ_K, nr_socket, 0, 4), + _stmt(_BPF_LD_W_ABS, _OFF_ARG0), + _jump(_BPF_JEQ_K, _AF_INET, 1, 0), + _jump(_BPF_JEQ_K, _AF_INET6, 0, 1), + _stmt(_BPF_RET_K, deny), + _stmt(_BPF_RET_K, _SECCOMP_RET_ALLOW), + ]) + n = len(program) // 8 + buf = ctypes.create_string_buffer(program, len(program)) + fprog = _SockFprog(len=n, filter=ctypes.cast(buf, ctypes.c_void_p)) + + libc = ctypes.CDLL("libc.so.6", use_errno=True) + rc = libc.prctl(_PR_SET_SECCOMP, _SECCOMP_MODE_FILTER, ctypes.byref(fprog), 0, 0) + return rc == 0 diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 119bee6..17ba42e 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -28,6 +28,9 @@ from pathlib import Path from mcp_server.config import ( + COMMON_MEASURES_DIR, + COMSTOCK_MEASURES_DIR, + INPUT_ROOT, SANDBOX_GID, SANDBOX_MODE, SANDBOX_RLIMIT_AS, @@ -36,8 +39,17 @@ SANDBOX_RLIMIT_NOFILE, SANDBOX_RLIMIT_NPROC, SANDBOX_UID, + SKILLS_DIR, ) +# Read-only roots the confined subprocess legitimately needs (Landlock grants +# read+exec here, nothing else; the run dir is the only writable path). Read-deny +# by default: anything not listed — another user's run, /tmp, arbitrary host +# files — is unreadable. Missing paths are skipped by the Landlock layer. +_FULL_MODES = ("auto", "full", "landlock") +_RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", + "/opt/venv", "/usr/local", "/var/oscli", "/proc", "/sys") + # Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately # needs. Everything else — notably any host secret — is dropped in confined modes. _ENV_ALLOW_EXACT = frozenset({ @@ -57,11 +69,25 @@ def enabled() -> bool: return SANDBOX_MODE not in ("", "off", "0", "false", "no") +def _full() -> bool: + """True when the full tier (Landlock FS + seccomp net-deny) is requested.""" + return SANDBOX_MODE in _FULL_MODES + + def active_tier() -> str: """The confinement tier actually in effect — surfaced in tool output.""" if not enabled(): return "off" - return "posix" # later increments: "landlock" / "bwrap" + return "landlock" if _full() else "posix" + + +def _ro_paths() -> list[str]: + """Read-only roots the confined subprocess may read/execute.""" + return [ + *_RO_SYSTEM, + str(COMSTOCK_MEASURES_DIR), str(COMMON_MEASURES_DIR), + str(INPUT_ROOT), str(SKILLS_DIR), "/repo", + ] def build_env(work_dir: Path | str) -> dict[str, str]: @@ -89,13 +115,15 @@ def build_env(work_dir: Path | str) -> dict[str, str]: return env -def wrap_cmd(cmd: list[str]) -> list[str]: +def wrap_cmd(cmd: list[str], work_dir: Path | str) -> list[str]: """Wrap a subprocess argv to run under the privilege-dropping exec shim. Disabled → returns the command unchanged (today's behaviour). Enabled → runs it via `python3 -m mcp_server._sandbox_exec`, which drops to the unprivileged sandbox uid, applies rlimits, sets no_new_privs, then execs the command (so the pid is preserved and the dispatcher's kill-by-pid path still works). + Full tier additionally confines the filesystem (Landlock: read-only system + roots, writable only `work_dir`) and denies outbound IP networking (seccomp). """ if not enabled(): return list(cmd) @@ -112,6 +140,14 @@ def wrap_cmd(cmd: list[str]) -> list[str]: ): if value and value > 0: wrapped += [flag, str(value)] + if _full(): + for path in _ro_paths(): + wrapped += ["--landlock-ro", path] + # rw: the run dir, plus /dev for /dev/null & /dev/urandom (node creation + # there is still barred by DAC for the unprivileged uid). + for path in (str(work_dir), "/dev"): + wrapped += ["--landlock-rw", path] + wrapped += ["--seccomp-net"] return [*wrapped, "--", *cmd] diff --git a/mcp_server/skills/measures/operations.py b/mcp_server/skills/measures/operations.py index da641dc..e4f1e63 100644 --- a/mcp_server/skills/measures/operations.py +++ b/mcp_server/skills/measures/operations.py @@ -265,7 +265,7 @@ def apply_measure( sandbox.prepare_workdir(run_dir) with open(log_path, "w", encoding="utf-8") as log_f: proc = subprocess.run( - sandbox.wrap_cmd(cmd), + sandbox.wrap_cmd(cmd, run_dir), cwd=str(run_dir), stdout=log_f, stderr=subprocess.STDOUT, diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index 2145eed..df8de0e 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -270,7 +270,7 @@ def _launch(rec: RunRecord) -> None: run_env = sandbox.build_env(rec.run_dir) sandbox.prepare_workdir(rec.run_dir) proc = subprocess.Popen( # noqa: S603 - cmd built from trusted config + staged OSW path - sandbox.wrap_cmd(_build_run_cmd(rec.osw_path)), + sandbox.wrap_cmd(_build_run_cmd(rec.osw_path), rec.run_dir), cwd=str(rec.run_dir), stdout=log.open("w", encoding="utf-8"), stderr=subprocess.STDOUT, From 1b6a62b2dcec6476cf351196e71929467d9b224c Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 13:23:56 -0400 Subject: [PATCH 03/26] fix(security): keep test_measure temp off the run dir (Docker bind mount) Under clean-env, pass redirect_tmp=False for test_measure's pytest/ruby run so TMPDIR stays on /tmp. test_measure isn't Landlocked, and pytest's capture uses an unlinked tempfile that the Docker Desktop bind mount can't keep open when TMPDIR is run_dir/tmp -> FileNotFoundError on truncate. apply_measure/sim keep TMPDIR=run_dir/tmp (Landlock needs it; openstudio doesn't use unlinked tempfiles). Fixes the only 2 failures in the full-suite-under-auto run (719 -> 721 pass). Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/sandbox.py | 30 +++++++++++-------- .../skills/measure_authoring/operations.py | 4 +-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 17ba42e..336572d 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -90,28 +90,34 @@ def _ro_paths() -> list[str]: ] -def build_env(work_dir: Path | str) -> dict[str, str]: +def build_env(work_dir: Path | str, *, redirect_tmp: bool = True) -> dict[str, str]: """Environment for a confined subprocess. Disabled → full passthrough (``os.environ.copy()``), preserving today's - behaviour. Enabled → only allowlisted variables survive and HOME/TMPDIR are - pinned into the work dir, so no host secret reaches measure code and caches - stay inside the run directory (which the FS backend will later confine). + behaviour. Enabled → only allowlisted variables survive, so no host secret + reaches measure code. + + redirect_tmp (default) pins HOME/TMPDIR into work_dir/tmp, keeping temp + + caches inside the run dir so the Landlock backend (which makes only that dir + writable) permits them. Pass redirect_tmp=False for the non-Landlocked + test_measure path: pytest's capture uses an unlinked tempfile, which the + Docker-Desktop bind mount can't keep open — so it must stay on /tmp. """ if not enabled(): return os.environ.copy() - work = Path(work_dir) env = { k: v for k, v in os.environ.items() if k in _ENV_ALLOW_EXACT or k.startswith(_ENV_ALLOW_PREFIXES) } - env["HOME"] = str(work) - tmp = work / "tmp" - try: - tmp.mkdir(parents=True, exist_ok=True) - env["TMPDIR"] = str(tmp) - except OSError: - pass + if redirect_tmp: + work = Path(work_dir) + env["HOME"] = str(work) + tmp = work / "tmp" + try: + tmp.mkdir(parents=True, exist_ok=True) + env["TMPDIR"] = str(tmp) + except OSError: + pass return env diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 8b415bc..df386b4 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -1017,7 +1017,7 @@ def test_measure_op( proc = subprocess.run( ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"], cwd=str(mdir), - env=sandbox.build_env(mdir), + env=sandbox.build_env(mdir, redirect_tmp=False), capture_output=True, text=True, timeout=60, check=False, ) else: @@ -1028,7 +1028,7 @@ def test_measure_op( proc = subprocess.run( ["ruby", "-I", ".", str(test_files[0])], cwd=str(mdir), - env=sandbox.build_env(mdir), + env=sandbox.build_env(mdir, redirect_tmp=False), capture_output=True, text=True, timeout=60, check=False, ) From 3d9de873f434b9d3ce93d2d80bf81ac727151eb2 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 14:01:03 -0400 Subject: [PATCH 04/26] feat(security): secure-by-default + local/non-root protection (sandbox increment 4) Decouple the confinement layers and make auto the default: - _sandbox_exec: drop uid only when root; the unprivileged layers (Landlock + seccomp + rlimits) still apply for a non-root local server. - sandbox.py: degrade loudly on platforms with no kernel backend (macOS/Windows -> clean-env only + one-shot warning); chown run dir only when root; active_tier() reports "clean-env" where no backend. - config: OSMCP_SANDBOX default off -> auto (off stays the escape hatch). This protects LOCAL users: the server bind-mounts host dirs (/repo, /inputs); the sandbox makes them read-only so an LLM-authored measure can't write the user's real files. Full suite previously verified green under auto (721 passed). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/plans/measure-exec-sandbox.md | 23 +++++++++++++++++------ mcp_server/_sandbox_exec.py | 22 ++++++++++++++-------- mcp_server/config.py | 13 ++++++++----- mcp_server/sandbox.py | 23 ++++++++++++++++++++++- 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/docs/plans/measure-exec-sandbox.md b/docs/plans/measure-exec-sandbox.md index 409accf..bccb435 100644 --- a/docs/plans/measure-exec-sandbox.md +++ b/docs/plans/measure-exec-sandbox.md @@ -166,18 +166,25 @@ primary control: tools stay on by default because their execution is confined. measures + weather (39 tests) green under `auto`. 4. bwrap backend + userns probe + operator docs — medium (optional upgrade) 5. Elicitation gate — defer -6. Flip default `OSMCP_SANDBOX` off → `auto` — **next.** Gate: run the FULL - integration suite (all shards) under `auto` first to surface any remaining - Landlock allowlist gaps; add missing ro paths (no rebuild); then flip the - default (or set `OSMCP_SANDBOX=auto` in the deployment/Dockerfile ENV). - Still pending from bucket 1: wire sim-timeout *enforcement* into the dispatcher. +6. Secure-by-default + local protection — **DONE (increment 4):** decoupled the + layers so the unprivileged ones (Landlock + seccomp + rlimits) apply even when + the server runs **non-root** (the uid-drop is the only root-gated layer, now + skipped gracefully); `wrap_cmd` degrades loudly on platforms with no kernel + backend (macOS/Windows → clean-env only + one-shot warning); flipped the code + default `off → auto`. This protects LOCAL users too: the server bind-mounts the + user's host dirs (`/repo`, `/inputs`), and the sandbox makes them read-only so + an LLM-authored measure can't write into the user's real files. Gate met: full + suite **721 passed under auto**; only fix needed was the test_measure TMPDIR + one (Docker-bind unlinked-tempfile). `off` remains the explicit escape hatch. + Still pending: wire sim-timeout *enforcement* into the dispatcher; optional + macOS Seatbelt backend. ## Testing strategy Goal: tests that (a) **prove the holes exist today** without touching anything real, and (b) **prove each is closed** after the fix. Both halves run in CI. -**Status:** `tests/test_sandbox.py` — 9 integration tests, all passing (~34s). +**Status:** `tests/test_sandbox.py` — 11 integration tests, all passing. Kept LOCAL / git-excluded (working exploit PoC), run via the standalone `.github/workflows/security.yml`, NOT the main `ci` shards. Probes are generated at runtime via `create_measure` (Ruby + Python) so the real create→apply path is @@ -187,6 +194,10 @@ exercised. Coverage: - auto (Landlock + seccomp): read-escape + write-escape + net exfil blocked, run still succeeds - unconditional: `measure_dir` traversal rejected +- local issue (dual-run): an unconfined measure writes into the user's + bind-mounted host dir (`/repo`); the sandbox blocks it +- non-root: Landlock confines even when the shim runs as a non-root uid (the + uid-drop is skipped, FS confinement still applies) ### Core principle — dual-run falsifiability diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py index ee23b24..03bdb6f 100644 --- a/mcp_server/_sandbox_exec.py +++ b/mcp_server/_sandbox_exec.py @@ -88,14 +88,20 @@ def main(argv: list[str]) -> int: if not _seccomp.install_net_deny(): print("sandbox-exec: WARNING seccomp net-deny unavailable — network not confined", file=sys.stderr) - # Drop privileges: supplementary groups, then gid, then uid (order matters — - # setgroups/setgid require privilege we lose after setuid). - os.setgroups([ns.gid]) - os.setgid(ns.gid) - os.setuid(ns.uid) - if os.getuid() != ns.uid or os.geteuid() != ns.uid: - print("sandbox-exec: uid drop did not stick", file=sys.stderr) - return 2 + # Drop privileges only when we have them (root). The FS/network confinement + # above is unprivileged and already applied — so a non-root local server still + # gets Landlock + seccomp + rlimits; only the uid-drop (a defence-in-depth / + # multi-tenant layer) is skipped. Order matters: groups, gid, then uid. + if os.geteuid() == 0: + os.setgroups([ns.gid]) + os.setgid(ns.gid) + os.setuid(ns.uid) + if os.getuid() != ns.uid or os.geteuid() != ns.uid: + print("sandbox-exec: uid drop did not stick", file=sys.stderr) + return 2 + elif os.getuid() != ns.uid: + print(f"sandbox-exec: not root (uid {os.getuid()}) — keeping uid; " + "FS/network confinement still applied", file=sys.stderr) try: os.execvp(cmd[0], cmd) # noqa: S606 - intentional shell-less exec of the confined command diff --git a/mcp_server/config.py b/mcp_server/config.py index 22a20aa..61042be 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -59,11 +59,14 @@ def _safe_float(env_val: str, default: float) -> float: ENABLE_CODE_MODE = os.environ.get("OSMCP_CODE_MODE", "").lower() in ("1", "true") # Subprocess confinement for measure/simulation execution (see mcp_server/sandbox.py). -# off — full passthrough (current behaviour / explicit escape hatch) -# posix — clean-env allowlist (+ UID drop & rlimits in later increments) -# auto — best confinement available (currently == posix) -# Default off during rollout; flips to auto once the full suite passes confined. -SANDBOX_MODE = os.environ.get("OSMCP_SANDBOX", "off").strip().lower() +# off — full passthrough (explicit escape hatch for trusted/local-tooling use) +# posix — clean-env allowlist + UID drop (if root) + rlimits +# auto — best confinement available (default): clean-env + Landlock FS + +# seccomp net-deny + rlimits, plus UID drop when running as root. +# Secure by default. Degrades loudly: the unprivileged layers (Landlock/seccomp) +# apply even for a non-root local server; on a platform without a kernel backend +# (macOS/Windows bare installs) it falls back to clean-env only and logs a notice. +SANDBOX_MODE = os.environ.get("OSMCP_SANDBOX", "auto").strip().lower() # Network policy for confined subprocesses: deny (default) blocks outbound TCP # once the seccomp backend lands; allow leaves it open (trusted/BCL deployments). SANDBOX_NET = os.environ.get("OSMCP_SANDBOX_NET", "deny").strip().lower() diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 336572d..95084d7 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -50,6 +50,8 @@ _RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", "/opt/venv", "/usr/local", "/var/oscli", "/proc", "/sys") +_warned_no_backend = False # one-shot log when no kernel backend (non-Linux) + # Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately # needs. Everything else — notably any host secret — is dropped in confined modes. _ENV_ALLOW_EXACT = frozenset({ @@ -74,10 +76,17 @@ def _full() -> bool: return SANDBOX_MODE in _FULL_MODES +def _has_backend() -> bool: + """True where the kernel confinement shim can run (Landlock/seccomp = Linux).""" + return sys.platform.startswith("linux") + + def active_tier() -> str: """The confinement tier actually in effect — surfaced in tool output.""" if not enabled(): return "off" + if not _has_backend(): + return "clean-env" # env stripping only; no kernel backend on this platform return "landlock" if _full() else "posix" @@ -133,6 +142,16 @@ def wrap_cmd(cmd: list[str], work_dir: Path | str) -> list[str]: """ if not enabled(): return list(cmd) + if not _has_backend(): + # macOS/Windows bare installs: no kernel sandbox shim. Run unwrapped + # (clean-env from build_env still strips secrets); warn once. + global _warned_no_backend + if not _warned_no_backend: + print(f"[sandbox] no kernel confinement backend on {sys.platform}; " + "running unwrapped (clean-env only) — use the Docker image for " + "full FS/network confinement", file=sys.stderr) + _warned_no_backend = True + return list(cmd) wrapped = [ sys.executable, "-m", "mcp_server._sandbox_exec", "--uid", str(SANDBOX_UID), "--gid", str(SANDBOX_GID), @@ -165,7 +184,9 @@ def prepare_workdir(work_dir: Path | str) -> None: outputs afterwards. Bind mounts that ignore ownership (e.g. Docker Desktop) make this a harmless no-op there — confinement still holds for in-image paths. """ - if not enabled(): + # Only the root server needs to hand the dir to the sandbox uid; when not root + # the shim keeps the current uid, which already owns the dir it created. + if not enabled() or not (hasattr(os, "geteuid") and os.geteuid() == 0): return work = Path(work_dir) with contextlib.suppress(OSError): From e75d0b8fd32253297d90b4725e8371eb33e49bcc Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:28:47 -0400 Subject: [PATCH 05/26] chore: stop tracking docs/plans/measure-exec-sandbox.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Planning/working doc — keep local-only, not in the repo. Removed from the tree (history retains it); now git-excluded so it won't be re-added. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/plans/measure-exec-sandbox.md | 369 ----------------------------- 1 file changed, 369 deletions(-) delete mode 100644 docs/plans/measure-exec-sandbox.md diff --git a/docs/plans/measure-exec-sandbox.md b/docs/plans/measure-exec-sandbox.md deleted file mode 100644 index bccb435..0000000 --- a/docs/plans/measure-exec-sandbox.md +++ /dev/null @@ -1,369 +0,0 @@ -# Measure-exec sandbox: keep the tools, confine the subprocess - -**Status:** researched, proposed · **Supersedes** the deny-tools approach in -`safe-mode-disabled-tools.md` as the primary answer to external-review point 4. -(Denylist stays as an operator opt-out knob, but it removes core functionality — -not a real solution.) - -## Decision: follow the Codex CLI model - -Adopt OpenAI Codex CLI's architecture as the template (it solves our exact -problem: confine a native subprocess inside a container we don't control), with -one tightening: - -- **Landlock for the filesystem** — but **read-deny-by-default** (stricter than - Codex's read-allow-all). We can enumerate the few dirs `openstudio run` needs, - so we afford the stronger posture without breaking the happy path. -- **seccomp BPF to block network** — deny `socket(AF_INET/AF_INET6)`, allow - `AF_UNIX`. This is how Codex blocks net on kernels < 6.7. **Key consequence: - net-deny works at ANY Landlock ABI (incl. our WSL2 abi3), unprivileged, no - launch flags** — installing a seccomp filter needs only `no_new_privs` (which - we already set for Landlock), and `seccomp`/`prctl` are in Docker's default - allowlist. -- **no_new_privs + setuid drop + rlimits** underneath (the POSIX floor). -- **bwrap** only as an opportunistic upgrade (adds mount/PID/net namespaces) - where a startup userns probe passes — never the baseline. -- **Degrade loudly** — probe at startup, stamp the active tier into every exec's - audit line + tool output; `OSMCP_SANDBOX=off` must be explicit and is logged. - -Why Codex over Anthropic `sandbox-runtime`: srt's primary primitive is -bwrap/userns — exactly what stock Linux Docker seccomp blocks. Codex's -Landlock+seccomp needs no privileges and no launch flags, so it runs in the -operator's unmodified container. srt's richer egress allowlist (external proxy) -is overkill — we want net-deny, which a seccomp filter gives for free. - -**This collapses the tiers**: FS confinement (Landlock) + net-deny (seccomp) + -UID/rlimits (POSIX) all run together in stock Docker ≥23.0 with zero launch -flags. bwrap is a bonus, not a requirement. - -## Insight - -All arbitrary-code execution already funnels through 3 subprocess sites: - -| Site | Location | Cmd | Today | -|---|---|---|---| -| `apply_measure` | `skills/measures/operations.py:248` | `openstudio run --measures_only -w` | timeout 300s, full env, root | -| `test_measure` | `skills/measure_authoring/operations.py:1015` | `pytest` / `ruby` | timeout 60s, full env, root | -| `run_osw`/`run_simulation` | `skills/simulation/operations.py:265` (`_launch`) | `openstudio run -w` | **no timeout**, full env, root | - -The subprocess needs: ro on OpenStudio/EnergyPlus install, `/var/oscli` gems, -`/opt/*-measures`, `/inputs`, `/repo`; rw ONLY on its run dir. Nothing else. -That maps 1:1 onto a filesystem sandbox policy. Wrap these 3 sites → full -functionality, contained blast radius. - -## Why not the alternatives - -- **WASM (pydantic mcp-run-python style):** can't run native EnergyPlus/Ruby. Out. -- **microVMs (E2B/Modal/Firecracker/gVisor):** need host control / KVM; we're a - process inside someone's container. Host-level gVisor stays a docs recommendation. -- **Static analysis of measure code:** bypassable, not security. -- **NREL OpenStudio-server prior art:** none — zero sandboxing, single-tenant trust - model. We'd be ahead of upstream. -- **Closest prior art:** OpenAI Codex CLI (Landlock + seccomp, bwrap where possible, - explicit degrade) and Anthropic sandbox-runtime (bwrap + seccomp + net proxy). - -## Design: tiered sandbox, probed at startup, best tier wins - -One wrapper at the 3 chokepoints, e.g. `mcp_server/sandbox_exec.py` (or a -`sandboxed_run()` drop-in for `subprocess.run`/`Popen`): - -**Tier 0 — POSIX floor (always on):** -- Drop root → dedicated `sandbox` UID via wrapper exec (`setpriv --reuid --regid - --clear-groups --no-new-privs`) — NOT `preexec_fn` (fork-safety in threaded server) -- Run dirs `0700` sandbox-owned; install/mounts root-owned ro -- rlimits: `RLIMIT_CPU`, `RLIMIT_FSIZE` (caps SQL/ESO blowups), `RLIMIT_NPROC`, - `RLIMIT_AS` (generous — EnergyPlus legitimately uses GBs), `RLIMIT_NOFILE` -- Clean env: pass explicit allowlist (`PATH`, `RUBYLIB`, `ENERGYPLUS_EXE_PATH`, - bundler vars, `HOME=`, `TMPDIR=/tmp`) — today - `os.environ.copy()` leaks everything -- `start_new_session=True` + kill process group on timeout/cancel - -**Tier 1 — Landlock LSM (default kernel sandbox):** -- Works inside default-seccomp Docker ≥23.0, unprivileged, zero launch flags — - the only kernel sandbox with that property (`landlock_*` syscalls allowlisted - since moby 2022; verified in current default profile) -- Policy: ro+exec `/usr`, `/lib*`, `/usr/local/openstudio-*`, `/var/oscli`, - `/opt/comstock-measures`, `/opt/common-measures`, `/inputs`, `/repo`; - rw only ``; requires `prctl(PR_SET_NO_NEW_PRIVS)` first -- Landlock ABI ≥4 (kernel 6.7) can also deny TCP natively; WSL2 dev kernel = - ABI 3 (FS only, verified). We do NOT rely on this for network — see seccomp. -- Implement: ~100-line ctypes module we own (PyPI `landlock` pkg is 27★/dev - release; `landrun` CLI is Go binary) — syscall surface is 3 calls - -**Tier 1b — seccomp BPF net-deny (rides with Landlock):** -- BPF filter returning `EAFNOSUPPORT` for `socket(AF_INET/AF_INET6)`, allowing - `AF_UNIX` (so the OpenStudio bundler / local IPC still works). Installed in the - same wrapper right after `no_new_privs`. -- Works at ANY kernel/ABI in stock Docker — this is our network control, not - Landlock-abi4 or bwrap. (Codex's exact mechanism.) -- Implement: raw BPF in the wrapper shim, or `pyseccomp`/libseccomp if we accept - the dep. Lean raw-BPF to stay dep-free and grepable. - -**Tier 2 — bubblewrap (opportunistic upgrade):** -- `bwrap --unshare-all --clearenv --die-with-parent` + ro-binds + run-dir bind: - adds mount-ns illusions + **network-ns (no net even on ABI 3 kernels)** -- Blocked by stock Docker seccomp on Linux hosts (`clone(CLONE_NEWUSER)` denied - without CAP_SYS_ADMIN; moby#42441 still open). Docker Desktop/WSL2 ships - seccomp-unconfined → works there today (probed and confirmed in our dev container) -- Enable only when startup userns probe passes; document operator flags - (custom seccomp profile > `--cap-add SYS_ADMIN`; Ubuntu 24.04 hosts also need - AppArmor userns relief) for Linux deployments that want tier 2 - -**Probe at startup, audit one `sandbox_tier` event, and report the active tier -in tool responses** (`"sandbox": "bwrap" | "landlock-abi3" | "posix" | "none"`) -— degradation must be visible, never silent (Codex pattern). - -## Orthogonal quick wins (do regardless) - -1. `is_path_allowed()` check on `apply_measure(measure_dir=...)` — today accepts - any path holding a `measure.rb` (other users' runs!) -2. Timeout (or operator-config max walltime) on `run_osw`/`run_simulation` — - only manual cancel today -3. Clean env allowlist (tier 0 item, but ship first — trivial, closes secret leakage) - -## Optional human gate (HITL, not containment) - -MCP elicitation: FastMCP `ctx.elicit()` ≥2.10; Claude Code supports it ≥v2.1.76, -Claude Desktop does NOT yet. Pattern: show measure code diff → accept/decline -before exec. Fallback for non-supporting clients: return -`{"needs_approval": true, "code_sha256": ...}` → second call with -`approved_sha256=`. Complements sandbox; doesn't replace it (the LLM writes the -code the human skims). Defer to v2 unless an operator asks. - -## Relation to safe-mode denylist - -Keep `OSMCP_DISABLED_TOOLS` / `OSMCP_SAFE_MODE` as a small operator knob -(some deployments genuinely want measure-authoring off), but the sandbox is the -primary control: tools stay on by default because their execution is confined. - -## Implementation order - -1. Quick wins (env allowlist, `measure_dir` path check, sim timeout) — small - **— DONE (increment 1):** `OSMCP_SANDBOX` knob + `mcp_server/sandbox.py` - clean-env allowlist wired into all 3 exec sites (`apply_measure`, - `test_measure`, sim `_launch`); unconditional `is_path_allowed` check on - `apply_measure(measure_dir=)`; `OSMCP_SIM_TIMEOUT_SECONDS` config added. - No Docker rebuild (pure Python). Default `off` = passthrough, so the existing - suite is untouched. Verified: clean-env hides the canary under `posix` and a - normal run still succeeds; traversal rejected; `test_measures` green. - **Still pending in this bucket:** wire the sim-timeout *enforcement* into the - dispatcher (config constant exists; `_launch`/dispatch loop must kill runs - past the cap via the existing SIGTERM→SIGKILL path). -2. POSIX floor: UID drop + rlimits — **DONE (increment 2):** `sandbox` user baked - into the image (`useradd -u 1001`); `mcp_server/_sandbox_exec.py` shim drops - root→1001, applies rlimits (FSIZE 10G / NPROC 1024 on; CPU/AS off), sets - no_new_privs, execs (pid preserved); `wrap_cmd`/`prepare_workdir` in - `sandbox.py`. Verified: measure runs as uid 1001, NPROC rlimit applied, DAC - blocks root-owned write; real EnergyPlus sim runs fine under `posix`. -3. Landlock FS + seccomp net-deny — **DONE (increment 3):** own ctypes modules - `_landlock.py` (read-deny-by-default; ro system roots, rw only the run dir + - /dev) and `_seccomp.py` (raw single-arch cBPF: deny `socket(AF_INET/INET6)`, - EAFNOSUPPORT). Applied in the shim after no_new_privs, degrade-loudly. Pure - Python — no rebuild; chose raw BPF over pyseccomp (its wheel imports broken - here). `OSMCP_SANDBOX=auto` = full tier; `active_tier()` reports `landlock`. - Verified: read-escape (world-readable /opt file), write-escape, and network - exfil all blocked while apply + real sim still succeed; comstock/common - measures + weather (39 tests) green under `auto`. -4. bwrap backend + userns probe + operator docs — medium (optional upgrade) -5. Elicitation gate — defer -6. Secure-by-default + local protection — **DONE (increment 4):** decoupled the - layers so the unprivileged ones (Landlock + seccomp + rlimits) apply even when - the server runs **non-root** (the uid-drop is the only root-gated layer, now - skipped gracefully); `wrap_cmd` degrades loudly on platforms with no kernel - backend (macOS/Windows → clean-env only + one-shot warning); flipped the code - default `off → auto`. This protects LOCAL users too: the server bind-mounts the - user's host dirs (`/repo`, `/inputs`), and the sandbox makes them read-only so - an LLM-authored measure can't write into the user's real files. Gate met: full - suite **721 passed under auto**; only fix needed was the test_measure TMPDIR - one (Docker-bind unlinked-tempfile). `off` remains the explicit escape hatch. - Still pending: wire sim-timeout *enforcement* into the dispatcher; optional - macOS Seatbelt backend. - -## Testing strategy - -Goal: tests that (a) **prove the holes exist today** without touching anything -real, and (b) **prove each is closed** after the fix. Both halves run in CI. - -**Status:** `tests/test_sandbox.py` — 11 integration tests, all passing. -Kept LOCAL / git-excluded (working exploit PoC), run via the standalone -`.github/workflows/security.yml`, NOT the main `ci` shards. Probes are generated -at runtime via `create_measure` (Ruby + Python) so the real create→apply path is -exercised. Coverage: -- off: all five holes confirmed (root, env leak, read/write escape, net exfil) -- posix: uid 1001, NPROC rlimit, DAC blocks root-owned write, env stripped -- auto (Landlock + seccomp): read-escape + write-escape + net exfil blocked, - run still succeeds -- unconditional: `measure_dir` traversal rejected -- local issue (dual-run): an unconfined measure writes into the user's - bind-mounted host dir (`/repo`); the sandbox blocks it -- non-root: Landlock confines even when the shim runs as a non-root uid (the - uid-drop is skipped, FS confinement still applies) - -### Core principle — dual-run falsifiability - -Every security test runs the SAME attack measure twice: -- `OSMCP_SANDBOX=off` → attack must **SUCCEED**. Proves the hole is real today - AND proves the attack mechanism actually fires. A sandbox test that can't show - the attack succeeding unsandboxed is vacuous (green for the wrong reason). -- `OSMCP_SANDBOX=auto` (best active tier) → attack must be **BLOCKED**, with the - violation visible in tool output. - -A fix is "proven" only when the off-run breaches and the on-run blocks. The -off-run is the regression guard against vacuous-green. - -### Safety — canaries + decoys, never real targets - -No test touches a real secret, real sensitive file, or real network: -- **Decoy "victim" run** — fixture creates `runs/_decoy_victim/{run_record.json, - secret.txt}` (known content), a SIBLING of the granted run_dir → reading/writing - it from inside the sandbox is a genuine escape, but it sits on the disposable - runs volume and the fixture deletes it. (cross-user read/write.) -- **Canary env secret** — launch the test server with - `OSMCP_CANARY_SECRET=do-not-leak-`. A fake value, never a real credential. - (the `os.environ.copy()` leak.) -- **Localhost canary listener** — throwaway TCP server on `127.0.0.1:`; - measure tries to POST the canary to it. Nothing leaves the machine. (net exfil.) -- **Bounded resource probes (critical)** — the off/control run must NEVER run an - unbounded bomb. Fork test forks a fixed modest N (e.g. 64) and checks they - succeed → infers "no NPROC cap" without a real fork bomb. Disk test writes a - fixed small file in the control to confirm "no FSIZE cap" without filling disk. - CPU/memory controls are SKIPPED (can't bound an unlimited spin/alloc safely); - on-runs allocate/spin just past the configured cap and assert the cap fires. - Outer pytest timeout + an outer rlimit on the test process backstop everything. - -All attack measures are committed fixtures under `tests/assets/sandbox_probes/`, -run only inside the disposable container. - -### How a probe measure reports outcome - -Each attack measure attempts the action, rescues the error, and -`runner.registerInfo("PROBE result=")` — `apply_measure` returns -runner messages, so the test asserts on them. The AUTHORITATIVE check is external -ground truth (decoy file untouched / listener received nothing / recorded euid), -not the measure's self-report. Two independent assertions per test. - -### Per-issue test matrix - -| # | Issue | Attack measure does | off (today) | on (fixed) | min tier | -|---|---|---|---|---|---| -| 1 | Write escape | `File.write` to decoy sibling dir | write succeeds, decoy modified | EACCES, decoy unchanged | landlock | -| 2 | Read other user's run | `File.read` decoy `secret.txt` | returns known content | EACCES, content absent from output | landlock | -| 3 | Env secret leak | dump ENV to run_dir | canary present | canary absent | posix (clean env) | -| 4 | Runs as root | record `Process.uid` | uid==0 | uid==1001 (sandbox) | posix | -| 5 | Network exfil | TCP connect+POST canary to 127.0.0.1 | listener receives canary | connection refused, nothing received | seccomp | -| 6 | Fork storm | fork N=64 (bounded) | all succeed | blocked past NPROC cap | posix (rlimit) | -| 7 | Disk bomb | write past FSIZE cap in run_dir | (control: small file, no cap) | SIGXFSZ/EFBIG at cap | posix (rlimit) | -| 8 | CPU spin | busy loop | (control skipped) | SIGXCPU at CPU cap | posix (rlimit) | -| 9 | Memory bomb | alloc past AS cap | (control skipped) | ENOMEM at cap | posix (rlimit) | -| 10 | No sim timeout | sleep 60s, cap=5s | (control: outer timeout) | killed ~5s, status failed/timeout | walltime mgr | -| 11 | measure_dir traversal | `apply_measure(measure_dir=)` | copies/executes | `{ok:false, "not allowed"}` | path check (no exec) | - -### Tier-awareness (honest skips, not vacuous passes) - -Active tier depends on the runner's kernel/Docker: -- Local WSL2 = landlock-abi3 + seccomp (+ bwrap if userns probe passes on Docker - Desktop's unconfined daemon). -- CI ubuntu-24.04 = Docker ≥23 / kernel 6.x → landlock-abi4 + seccomp; bwrap - blocked by default seccomp. - -Each test declares its min tier (matrix col). If active tier < needed → **SKIP -with a loud reason** ("net confinement needs seccomp; active=posix"), never -silent-pass. Plus a **guard test**: in the integration env assert active tier ∈ -{landlock-abi3, landlock-abi4, bwrap}; FAIL if posix/none — that would mean the -sandbox silently didn't engage and the whole suite is vacuous. - -### Tier-probe unit tests - -Force each tier path, assert the correct argv is built (setpriv / landlock-shim / -bwrap) and that the probe downgrades correctly when a syscall is stubbed to fail. -Unit tier (no `openstudio` import). - -### Repo compliance - -- Integration tier (real `openstudio` CLI, mock nothing), - `RUN_OPENSTUDIO_INTEGRATION=1`, AAA structure, `# Regression:`/`# Validates:` - comments, exact assertions incl. error-message text, `parametrize` the issue - set, add to the lightest CI shard. -- New file `tests/test_sandbox.py` + fixtures `tests/assets/sandbox_probes/`. - -## Resolved decisions (defaults) - -Net effect: works in stock Docker, no launch flags, blast radius = the run dir, -with operator knobs for looser (trusted/BCL) or tighter (cgroup memory) needs. - -1. **Min Docker version → don't gate; probe-and-degrade. Document 23.0 as the FS - line.** seccomp + UID + rlimits work on ~any Docker (those syscalls - default-allowed for years); only Landlock FS confinement needs ≥23.0. A hard - minimum would block legit operators for no safety gain — on old Docker you - lose FS-deny but keep net-deny + UID + rlimits, and the tier report says so. - -2. **One shared `sandbox` UID for v1, not per-run ephemeral.** Landlock isolates - the filesystem *per process* — two runs sharing UID 1001 each get a ruleset - granting only their own run_dir, so cross-run file access is blocked by - Landlock, not DAC; the UID barely matters for the main threat. Per-run UIDs - add a UID pool + chown + cleanup for little gain at `MAX_CONCURRENCY=1` - (current default). Close the same-UID residual (ptrace/signal between - concurrent runs) by adding `ptrace` to the seccomp deny list. Shared - `RLIMIT_NPROC` accounting is a minor co-tenant DoS, irrelevant at concurrency - 1. **Upgrade trigger:** revisit per-run UIDs only if a multi-user deployment - raises per-user concurrency > 1. - -3. **seccomp net-deny via libseccomp (`pyseccomp`), not hand-rolled BPF.** - Security-critical filter, and we ship **amd64 + arm64** — raw BPF must - hand-handle `AUDIT_ARCH` + per-arch syscall numbers for both, and a - subtly-wrong filter fails open. libseccomp is what Docker itself uses, - abstracts the arch mess, and `libseccomp2` is already in the base image. The - repo's dep-minimization rule targets heavyweight *unnecessary* deps (the - OpenLLMetry case), not reimplementing a security primitive across two arches. - (Raw BPF acceptable ONLY with explicit amd64+arm64 AUDIT_ARCH handling + a - fail-closed test per arch — not worth it.) - -4. **rlimits = generous anti-runaway backstops, operator-tunable env vars; not - tight quotas.** A tight cap silently breaks real sims (EnergyPlus uses GBs, - annual SQL is hundreds of MB–GB) — worse than no cap for the modeler. - - `RLIMIT_AS`: **avoid as primary** — counts virtual address space (mmap'd - libs), breaks allocators. Unset or ~24 GB backstop; push real memory control - to container cgroup (`docker run --memory`), document that. - - `RLIMIT_FSIZE`: ~10 GB per-file runaway guard. Real disk control = retention - GC + per-run-dir quota (cgroup/quota territory, rlimit can't express). - - `RLIMIT_NPROC` ~512, `RLIMIT_NOFILE` ~4096, `RLIMIT_CPU` ~2× the existing - 300 s measure timeout. - -5. **Sim walltime cap: add `OSMCP_SIM_TIMEOUT_SECONDS`, default 7200 (2 h), - 0 = unlimited.** Real gap today (sims hang forever; manual cancel only). 2 h - is a hang-catcher, not a tight bound — big annual/parametric runs take - 10–30+ min, so a low default would silently kill legit work. Reuse the - dispatcher's existing SIGTERM→SIGKILL path (`cancel_run`). Fire loudly: - status=`timeout`, clear error. - -6. **Own ctypes Landlock module, don't vendor `landrun`.** 3 syscalls, ~100 - lines, fully grepable/auditable — matches house style ("everything direct, - visible in stack traces"). `landrun` is a young (Mar 2025) external Go binary - = supply-chain dep + vendoring across 2 arches + another thing to track, for - security-critical code we should own. PyPI `landlock` pkg is dev-release/27★ — - don't depend on that either. (Per-arch syscall-number care as in #3, but - Landlock numbers are stable.) - -7. **Sandbox `test_measure` too, same wrapper.** pytest/ruby on LLM-authored - measure code is the same arbitrary-exec threat as `apply_measure` — "test this - measure" *is* the trust-the-code moment. Its legit needs (own test fixtures, - test model) live in granted dirs. Caveat: a test that legitimately hits the - network (BCL download) breaks under net-deny — see #8. - -8. **Default net-deny; expose `OSMCP_SANDBOX_NET=deny|allow`; confirm - empirically.** `openstudio run -w` runs EnergyPlus locally and `--bundle` - resolves from the offline gem path → should need no AF_INET, so net-deny - shouldn't break normal runs — **but verify, don't assume** (smoke test: a - normal sim succeeds under net-deny). The real exception is BCL-downloading - measures (need AF_INET); blocking them is *correct* for untrusted mode but - trusted deployments want them — hence the toggle. AF_UNIX always allowed - (local IPC/bundler). - -## Still open (need your input) - -- Exact rlimit/timeout default *values* above — sane for your largest real - models? (chosen as backstops, not measured against a worst-case OSM) -- Container-level memory cgroup: document-only, or ship a recommended - `--memory` in compose / run docs? -- Per-run-dir disk quota — out of scope for v1, or needed now? (rlimit can't do - it; needs cgroup/quota) From 90e89c66066f9c591ca9a8d9b93d42c535efd05d Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 06/26] refactor(security): add reject_escaping_symlinks() staging guard Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/util.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mcp_server/util.py b/mcp_server/util.py index 75ef54c..d3d2926 100644 --- a/mcp_server/util.py +++ b/mcp_server/util.py @@ -1,10 +1,37 @@ from __future__ import annotations import json +import os import shutil from pathlib import Path +def reject_escaping_symlinks(root: Path) -> str | None: + """Return an error string if the directory tree `root` contains a symlink + whose target resolves OUTSIDE `root`; else None. + + Used before staging untrusted measure/OSW trees: a link to a host file + (e.g. -> /etc/shadow) must not be copied or executed. Internal symlinks are + allowed. Does not follow symlinked directories (no traversal / loops). + """ + try: + rroot = str(root.resolve()) + except OSError: + return f"Cannot resolve path: {root}" + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + for name in dirnames + filenames: + p = Path(dirpath) / name + if not p.is_symlink(): + continue + try: + target = str(p.resolve()) + except OSError: + return f"Broken symlink in staged tree: {name}" + if target != rroot and not target.startswith(rroot + os.sep): + return f"Symlink escapes the allowed directory (not allowed): {name}" + return None + + def safe_read_text(path: Path, max_bytes: int = 200_000) -> str: data = safe_read_bytes(path, max_bytes=max_bytes) return data.decode("utf-8", errors="replace") From 838b6c3b6bd93dd480aac814f3b328912a53759a Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 07/26] fix(security): reject sandbox uid/gid <= 0 (Codex H5) Never let a bad OSMCP_SANDBOX_UID override leave confined code as root. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/config.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mcp_server/config.py b/mcp_server/config.py index 61042be..56bbfc3 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -80,8 +80,15 @@ def _safe_float(env_val: str, default: float) -> float: # Unprivileged account confined subprocesses drop to (baked into the image as # `sandbox`). See mcp_server/_sandbox_exec.py. -SANDBOX_UID = _safe_int(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001) -SANDBOX_GID = _safe_int(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001) +def _safe_sandbox_id(env_val: str, default: int) -> int: + """A positive uid/gid. Rejects <=0 (esp. 0=root) so a bad override can never + leave confined code running as root after the setuid 'drop'.""" + v = _safe_int(env_val, default) + return v if v > 0 else default + + +SANDBOX_UID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001) +SANDBOX_GID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001) # rlimit backstops for confined subprocesses (0 = off). Generous by design — they # catch runaway bombs, not tune normal use. CPU/AS default off: a CPU-second cap From a27f41f00e737a27d11ef399ffbcbabee7a67126 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 08/26] fix(security): seccomp KILL unexpected arch + x32, deny io_uring_setup (Codex H1) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/_seccomp.py | 55 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/mcp_server/_seccomp.py b/mcp_server/_seccomp.py index cdf809b..f80342c 100644 --- a/mcp_server/_seccomp.py +++ b/mcp_server/_seccomp.py @@ -20,20 +20,26 @@ # BPF opcodes _BPF_LD_W_ABS = 0x00 | 0x00 | 0x20 # BPF_LD | BPF_W | BPF_ABS _BPF_JEQ_K = 0x05 | 0x10 | 0x00 # BPF_JMP | BPF_JEQ | BPF_K +_BPF_JSET_K = 0x05 | 0x40 | 0x00 # BPF_JMP | BPF_JSET | BPF_K (bit test) _BPF_RET_K = 0x06 | 0x00 # BPF_RET | BPF_K # seccomp return actions _SECCOMP_RET_ALLOW = 0x7FFF0000 _SECCOMP_RET_ERRNO = 0x00050000 +_SECCOMP_RET_KILL_PROCESS = 0x80000000 +_X32_SYSCALL_BIT = 0x40000000 # x86_64 x32 ABI marks syscall nrs with this bit # seccomp_data field offsets (nr@0, arch@4, ip@8, args[0]@16 ...; little-endian) _OFF_NR = 0 _OFF_ARCH = 4 _OFF_ARG0 = 16 -# AUDIT_ARCH + __NR_socket per arch +# AUDIT_ARCH + syscall numbers per arch. io_uring_setup is blocked too: io_uring +# can create/connect sockets via submission-queue ops, bypassing a socket()-only +# filter — denying its setup closes that hole (no measure legitimately uses it). _AUDIT_ARCH = {"x86_64": 0xC000003E, "aarch64": 0xC00000B7} _NR_SOCKET = {"x86_64": 41, "aarch64": 198} +_NR_IO_URING_SETUP = {"x86_64": 425, "aarch64": 425} # generic syscall number _AF_INET = 2 _AF_INET6 = 10 @@ -60,27 +66,40 @@ def install_net_deny() -> bool: return False audit_arch = _AUDIT_ARCH[mach] nr_socket = _NR_SOCKET[mach] - deny = _SECCOMP_RET_ERRNO | (errno.EAFNOSUPPORT & 0xFFFF) - - # Instruction layout (jt/jf are offsets from the NEXT instruction): - # 0 load arch - # 1 arch == ours ? -> 2 : ALLOW(8) - # 2 load nr - # 3 nr == socket ? -> 4 : ALLOW(8) - # 4 load args[0] (domain) - # 5 domain == AF_INET ? DENY(7) : 6 - # 6 domain == AF_INET6 ? DENY(7) : ALLOW(8) - # 7 RET deny - # 8 RET allow + nr_io_uring = _NR_IO_URING_SETUP[mach] + deny_net = _SECCOMP_RET_ERRNO | (errno.EAFNOSUPPORT & 0xFFFF) + deny_enosys = _SECCOMP_RET_ERRNO | (errno.ENOSYS & 0xFFFF) # io_uring "unavailable" + + # Instruction layout (jt/jf are offsets from the NEXT instruction). Unexpected + # architectures (e.g. a 32-bit i386 binary, different AUDIT_ARCH) and x32-ABI + # syscalls (x86_64 syscall nr with the 0x40000000 bit) are KILLed rather than + # allowed — otherwise they'd issue unfiltered socket() calls (fail-open). + # 0 load arch + # 1 arch == ours ? -> 2 : KILL(9) + # 2 load nr + # 3 nr & x32_bit ? -> KILL(9) : 4 + # 4 nr == io_uring_setup ? -> ENOSYS(10) : 5 + # 5 nr == socket ? -> 6 : ALLOW(12) + # 6 load args[0] (domain) + # 7 domain == AF_INET ? -> DENY_NET(11): 8 + # 8 domain == AF_INET6 ? -> DENY_NET(11): ALLOW(12) + # 9 RET kill + # 10 RET enosys + # 11 RET deny_net (EAFNOSUPPORT) + # 12 RET allow program = b"".join([ _stmt(_BPF_LD_W_ABS, _OFF_ARCH), - _jump(_BPF_JEQ_K, audit_arch, 0, 6), + _jump(_BPF_JEQ_K, audit_arch, 0, 7), _stmt(_BPF_LD_W_ABS, _OFF_NR), - _jump(_BPF_JEQ_K, nr_socket, 0, 4), + _jump(_BPF_JSET_K, _X32_SYSCALL_BIT, 5, 0), + _jump(_BPF_JEQ_K, nr_io_uring, 5, 0), + _jump(_BPF_JEQ_K, nr_socket, 0, 6), _stmt(_BPF_LD_W_ABS, _OFF_ARG0), - _jump(_BPF_JEQ_K, _AF_INET, 1, 0), - _jump(_BPF_JEQ_K, _AF_INET6, 0, 1), - _stmt(_BPF_RET_K, deny), + _jump(_BPF_JEQ_K, _AF_INET, 3, 0), + _jump(_BPF_JEQ_K, _AF_INET6, 2, 3), + _stmt(_BPF_RET_K, _SECCOMP_RET_KILL_PROCESS), + _stmt(_BPF_RET_K, deny_enosys), + _stmt(_BPF_RET_K, deny_net), _stmt(_BPF_RET_K, _SECCOMP_RET_ALLOW), ]) n = len(program) // 8 From cce16737c724957846cc6eb396977f16e2151f91 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 09/26] fix(security): Landlock IOCTL_DEV (ABI>=5) + fail-closed add_rule (Codex H4/M1) Also documents the packed path_beneath struct (rebuts Copilot #7). Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/_landlock.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mcp_server/_landlock.py b/mcp_server/_landlock.py index c143109..7d0a784 100644 --- a/mcp_server/_landlock.py +++ b/mcp_server/_landlock.py @@ -37,6 +37,7 @@ _A_MAKE_SYM = 1 << 12 _A_REFER = 1 << 13 # ABI >= 2 _A_TRUNCATE = 1 << 14 # ABI >= 3 +_A_IOCTL_DEV = 1 << 15 # ABI >= 5 — governs ioctl() on device files _RO = _A_EXECUTE | _A_READ_FILE | _A_READ_DIR _RW_BASE = ( @@ -53,6 +54,12 @@ class _RulesetAttr(ctypes.Structure): class _PathBeneathAttr(ctypes.Structure): + # MUST be packed (12 bytes). The kernel UAPI declares + # struct landlock_path_beneath_attr { __u64 allowed_access; __s32 parent_fd; } + # __attribute__((packed)); + # so _pack_=1 matches the kernel exactly. Do NOT "fix" this to natural + # alignment (16 bytes) — that would mismatch the kernel struct. Verified by + # the integration tests, which only pass if add_rule parsed this correctly. _pack_ = 1 _fields_ = [("allowed_access", ctypes.c_uint64), ("parent_fd", ctypes.c_int32)] @@ -74,27 +81,37 @@ def restrict(ro_paths: list[str], rw_paths: list[str]) -> int: handled |= _A_REFER if abi >= 3: handled |= _A_TRUNCATE + if abi >= 5: + handled |= _A_IOCTL_DEV # govern device ioctls (esp. with /dev writable) attr = _RulesetAttr(handled_access_fs=handled) rs_fd = libc.syscall(_NR_CREATE_RULESET, ctypes.byref(attr), ctypes.sizeof(attr), 0) if rs_fd < 0: return 0 - def _add(path: str, access: int) -> None: + def _add(path: str, access: int) -> bool: + """True if the path is absent (optional, skip) or its rule was added; + False only if a PRESENT path's rule failed (a broken ruleset).""" try: fd = os.open(path, os.O_PATH | os.O_CLOEXEC) except OSError: - return # path absent — skip + return True # absent path — optional, not a failure try: pba = _PathBeneathAttr(allowed_access=access & handled, parent_fd=fd) - libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) + return libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) == 0 finally: os.close(fd) + ok = True for p in ro_paths: - _add(p, _RO) + ok = _add(p, _RO) and ok for p in rw_paths: - _add(p, handled) # full set for the writable run dir + ok = _add(p, handled) and ok # full set for the writable run dir + if not ok: + # A present path's rule was rejected → enforce nothing rather than a + # half-applied policy; the caller (auto tier) fails closed on a 0 return. + os.close(rs_fd) + return 0 # no_new_privs is set by the caller; restrict_self enforces the ruleset on # this process and everything it execs/forks. From 60373e873fc59811434e29d0adc837092461a4c2 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 10/26] fix(security): fail-closed when Landlock/seccomp cannot engage (Codex H2) Load the landlock/seccomp shims before applying the FS restriction; refresh docstring. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/_sandbox_exec.py | 45 +++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py index 03bdb6f..36eb1e3 100644 --- a/mcp_server/_sandbox_exec.py +++ b/mcp_server/_sandbox_exec.py @@ -4,14 +4,17 @@ python3 -m mcp_server._sandbox_exec --uid N --gid N \ [--rlimit-fsize BYTES] [--rlimit-nproc N] [--rlimit-nofile N] \ - [--rlimit-cpu SECONDS] [--rlimit-as BYTES] -- CMD [ARG...] - -Runs as root, applies rlimits, drops to the unprivileged uid/gid, sets -``no_new_privs``, then ``execvp``s CMD. A standalone exec — NOT a Popen -``preexec_fn`` — so there is no fork-safety hazard in the threaded server, and -the dropped image keeps the same pid (so the dispatcher's existing -terminate/kill-by-pid path still works). Landlock FS rules + a seccomp net-deny -filter will be added here in the next increment, behind the same wrapper. + [--rlimit-cpu SECONDS] [--rlimit-as BYTES] \ + [--landlock-ro PATH ...] [--landlock-rw PATH ...] [--seccomp-net] \ + -- CMD [ARG...] + +Applies, in order: rlimits, ``prctl(no_new_privs)``, Landlock FS policy and a +seccomp network-deny filter (when their flags are given), then drops to the +unprivileged uid/gid **only when running as root** (a non-root local server keeps +its uid — the FS/network confinement is unprivileged and still applies), and +finally ``execvp``s CMD. A standalone exec — NOT a Popen ``preexec_fn`` — so +there is no fork-safety hazard in the threaded server, and the exec'd image keeps +the same pid (so the dispatcher's terminate/kill-by-pid path still works). """ from __future__ import annotations @@ -21,6 +24,10 @@ import resource import sys +# Imported up-front (before any Landlock restriction is applied below) so the +# module files are read while the filesystem is still unconfined. +from mcp_server import _landlock, _seccomp + _RLIMITS = { "cpu": resource.RLIMIT_CPU, # CPU-seconds "fsize": resource.RLIMIT_FSIZE, # max single-file bytes @@ -78,15 +85,19 @@ def main(argv: list[str]) -> int: # both persist across setuid + exec and are inherited by children). Degrade # loudly — a backend that cannot engage prints a notice and the run proceeds # with whatever confinement did apply (the POSIX floor always holds). - if ns.landlock_ro or ns.landlock_rw: - from mcp_server import _landlock - abi = _landlock.restrict(ns.landlock_ro, ns.landlock_rw) - if not abi: - print("sandbox-exec: WARNING Landlock unavailable — FS not confined", file=sys.stderr) - if ns.seccomp_net: - from mcp_server import _seccomp - if not _seccomp.install_net_deny(): - print("sandbox-exec: WARNING seccomp net-deny unavailable — network not confined", file=sys.stderr) + # Fail CLOSED: if a kernel backend was requested (auto/full tier) but cannot + # engage, refuse to run rather than execute untrusted code unconfined. An + # operator who knowingly wants weaker confinement selects OSMCP_SANDBOX=posix + # (UID drop + rlimits, no kernel FS/net) or off — neither requests these flags. + if (ns.landlock_ro or ns.landlock_rw) and not _landlock.restrict(ns.landlock_ro, ns.landlock_rw): + print("sandbox-exec: FATAL Landlock requested but unavailable — refusing to run " + "unconfined (use OSMCP_SANDBOX=posix or off to accept weaker confinement)", + file=sys.stderr) + return 3 + if ns.seccomp_net and not _seccomp.install_net_deny(): + print("sandbox-exec: FATAL seccomp net-deny requested but unavailable — refusing to " + "run unconfined (use OSMCP_SANDBOX=posix or off)", file=sys.stderr) + return 3 # Drop privileges only when we have them (root). The FS/network confinement # above is unprivileged and already applied — so a non-root local server still From b51d237bb6178c73d13c8ce9f7ddce902d8c51d2 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 11/26] fix(security): respect OSMCP_SANDBOX_NET, symlink-safe chown, tighten env, drop /repo (Codex H3) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/sandbox.py | 68 ++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 95084d7..556a1bc 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -5,20 +5,21 @@ They used to inherit the server's full environment via ``os.environ.copy()``. This module is the single chokepoint that confines them. -`OSMCP_SANDBOX` (config.SANDBOX_MODE) selects the mode: - off — full passthrough (current behaviour / explicit escape hatch) - posix — clean-env allowlist (this increment); UID drop + rlimits + Landlock - FS policy + seccomp net-deny arrive in later increments, same knob - auto — best confinement available (currently == posix) +`OSMCP_SANDBOX` (config.SANDBOX_MODE) selects the mode (default `auto`): + off — full passthrough (explicit escape hatch for trusted/local-tooling use) + posix — clean-env allowlist + UID drop (when root) + rlimits + auto — best available: posix + Landlock FS policy + seccomp net-deny (Linux) `off` is a true passthrough so an operator can deliberately disable confinement (the Codex ``danger-full-access`` model); the security PoC suite pins it to prove the holes still exist when off. -Increment status: clean-env floor (build_env) + UID drop & rlimits (wrap_cmd / -prepare_workdir, via _sandbox_exec). Landlock FS rules + seccomp net-deny land in -_sandbox_exec next. `active_tier()` reports what is in effect so degradation is -visible (never silent). +The unprivileged layers (clean-env, Landlock, seccomp, rlimits) apply even when +the server runs non-root; only the UID drop is root-gated and skipped otherwise. +`OSMCP_SANDBOX_NET=allow` opts out of the network deny. On a platform with no +kernel backend (macOS/Windows bare installs) `wrap_cmd` degrades to clean-env +only and warns once. `active_tier()` reports what is actually in effect so +degradation is visible (never silent). """ from __future__ import annotations @@ -33,6 +34,7 @@ INPUT_ROOT, SANDBOX_GID, SANDBOX_MODE, + SANDBOX_NET, SANDBOX_RLIMIT_AS, SANDBOX_RLIMIT_CPU, SANDBOX_RLIMIT_FSIZE, @@ -62,8 +64,13 @@ "COMSTOCK_MEASURES_DIR", "COMMON_MEASURES_DIR", "SKILLS_DIR", "OSCLI_GEMFILE", "OSCLI_GEM_PATH", "OSMCP_RUN_ROOT", "OPENSTUDIO_MCP_RUN_ROOT", + # Bundler / RubyGems config — EXPLICIT safe names only. A broad BUNDLE_/GEM_ + # prefix would leak credential vars (e.g. GEM_HOST_API_KEY, or per-host + # BUNDLE___ auth tokens) into untrusted measure code. + "BUNDLE_WITHOUT", "BUNDLE_PATH", "BUNDLE_GEMFILE", "BUNDLE_FROZEN", + "BUNDLE_DEPLOYMENT", "GEM_HOME", "GEM_PATH", }) -_ENV_ALLOW_PREFIXES = ("BUNDLE_", "GEM_", "LC_") +_ENV_ALLOW_PREFIXES = ("LC_",) # locale only (e.g. LC_CTYPE); never a secret prefix def enabled() -> bool: @@ -91,11 +98,16 @@ def active_tier() -> str: def _ro_paths() -> list[str]: - """Read-only roots the confined subprocess may read/execute.""" + """Read-only roots the confined subprocess may read/execute. + + /repo is deliberately NOT granted: the measure runs from the copied run dir, + and the shim imports mcp_server before applying Landlock — so the run never + needs to read the source tree (which could hold a host .env). + """ return [ *_RO_SYSTEM, str(COMSTOCK_MEASURES_DIR), str(COMMON_MEASURES_DIR), - str(INPUT_ROOT), str(SKILLS_DIR), "/repo", + str(INPUT_ROOT), str(SKILLS_DIR), ] @@ -130,15 +142,20 @@ def build_env(work_dir: Path | str, *, redirect_tmp: bool = True) -> dict[str, s return env -def wrap_cmd(cmd: list[str], work_dir: Path | str) -> list[str]: +def wrap_cmd(cmd: list[str], work_dir: Path | str, + extra_rw: tuple[str, ...] = ()) -> list[str]: """Wrap a subprocess argv to run under the privilege-dropping exec shim. Disabled → returns the command unchanged (today's behaviour). Enabled → runs it via `python3 -m mcp_server._sandbox_exec`, which drops to the unprivileged - sandbox uid, applies rlimits, sets no_new_privs, then execs the command (so - the pid is preserved and the dispatcher's kill-by-pid path still works). - Full tier additionally confines the filesystem (Landlock: read-only system - roots, writable only `work_dir`) and denies outbound IP networking (seccomp). + sandbox uid (when root), applies rlimits, sets no_new_privs, then execs the + command (the pid is preserved so the dispatcher's kill-by-pid path still + works). Full tier additionally confines the filesystem (Landlock: read-only + system roots, writable only `work_dir` + `extra_rw`) and — unless + OSMCP_SANDBOX_NET=allow — denies outbound IP networking (seccomp). + + extra_rw: additional writable Landlock roots (e.g. a private TMPDIR off the + bind mount for tools that need unlinked tempfiles). """ if not enabled(): return list(cmd) @@ -168,11 +185,14 @@ def wrap_cmd(cmd: list[str], work_dir: Path | str) -> list[str]: if _full(): for path in _ro_paths(): wrapped += ["--landlock-ro", path] - # rw: the run dir, plus /dev for /dev/null & /dev/urandom (node creation - # there is still barred by DAC for the unprivileged uid). - for path in (str(work_dir), "/dev"): + # rw: the run dir (+ caller extras), plus /dev for /dev/null & /dev/urandom + # (node creation there is still barred by DAC for the unprivileged uid). + for path in (str(work_dir), "/dev", *extra_rw): wrapped += ["--landlock-rw", path] - wrapped += ["--seccomp-net"] + # Network deny is the default; OSMCP_SANDBOX_NET=allow opts out (trusted + # deployments that legitimately fetch e.g. BCL components). + if SANDBOX_NET != "allow": + wrapped += ["--seccomp-net"] return [*wrapped, "--", *cmd] @@ -189,8 +209,10 @@ def prepare_workdir(work_dir: Path | str) -> None: if not enabled() or not (hasattr(os, "geteuid") and os.geteuid() == 0): return work = Path(work_dir) + # follow_symlinks=False (lchown): never let a symlink inside the dir redirect + # the chown to a target outside it (symlink-traversal privilege issue). with contextlib.suppress(OSError): - os.chown(work, SANDBOX_UID, SANDBOX_GID) + os.chown(work, SANDBOX_UID, SANDBOX_GID, follow_symlinks=False) for path in work.rglob("*"): with contextlib.suppress(OSError): - os.chown(path, SANDBOX_UID, SANDBOX_GID) + os.chown(path, SANDBOX_UID, SANDBOX_GID, follow_symlinks=False) From cca968178ed6585669d32e08043c81bfea0ae954 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 12/26] fix(security): apply_measure rejects escaping symlinks, stages symlinks=True (Codex C3) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/skills/measures/operations.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mcp_server/skills/measures/operations.py b/mcp_server/skills/measures/operations.py index e4f1e63..1a98961 100644 --- a/mcp_server/skills/measures/operations.py +++ b/mcp_server/skills/measures/operations.py @@ -24,7 +24,7 @@ user_run_root, ) from mcp_server.model_manager import get_model, load_model -from mcp_server.util import resolve_run_dir +from mcp_server.util import reject_escaping_symlinks, resolve_run_dir def list_measure_arguments(measure_dir: str) -> dict[str, Any]: @@ -153,6 +153,14 @@ def apply_measure( if not is_path_allowed(measure_path): return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"} + # Reject symlinks that escape the measure dir: copytree(symlinks=True) + # below preserves links instead of inlining target content, but a link + # pointing outside the measure root (e.g. -> /etc/shadow) must not be + # staged at all (it would leak host files into the readable run dir). + err = reject_escaping_symlinks(measure_path) + if err: + return {"ok": False, "error": err} + # Check measure script exists (Ruby or Python) has_rb = (measure_path / "measure.rb").is_file() has_py = (measure_path / "measure.py").is_file() @@ -173,7 +181,9 @@ def apply_measure( measures_dir = run_dir / "measures" measures_dir.mkdir(exist_ok=True) local_measure = measures_dir / measure_path.name - shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True) + # symlinks=True: copy links AS links (don't follow → no host-file content + # inlined into the run dir); escaping links already rejected above. + shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True, symlinks=True) # Build measure step arguments measure_args = {} From 37cd3b80549e4de53cebc7a1b257d9e337dbae21 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 13/26] fix(security): validate test_measure paths; escape/validate generated arg code; confine reporting-measure test (Codex C5/C2/C1) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../skills/measure_authoring/operations.py | 97 +++++++++++++------ 1 file changed, 67 insertions(+), 30 deletions(-) diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index df386b4..39bd079 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -8,13 +8,15 @@ import re import shutil import subprocess +import tempfile from pathlib import Path from typing import Any import openstudio from mcp_server import sandbox -from mcp_server.config import INPUT_ROOT, user_run_root +from mcp_server.config import INPUT_ROOT, is_path_allowed, user_run_root +from mcp_server.util import reject_escaping_symlinks def custom_measures_dir() -> Path: @@ -29,6 +31,9 @@ def custom_measures_dir() -> Path: ] _MEASURE_NAME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]{0,99}$") +# Argument names become bare Ruby/Python identifiers in generated code — they must +# be plain identifiers, never arbitrary text (else code injection at generation). +_ARG_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$") # Argument type -> SDK factory method name suffix _ARG_MAKERS = { @@ -91,6 +96,15 @@ def _escape_python_str(s: str) -> str: return s.replace("\\", "\\\\").replace('"', '\\"') +def _checked_arg_name(name: str) -> str: + """Validate an argument name as a plain identifier — it is emitted as a bare + Ruby/Python variable in generated code, so arbitrary text would be code + injection. Raises ValueError otherwise (callers wrap in ok:false).""" + if not isinstance(name, str) or not _ARG_NAME_RE.match(name): + raise ValueError(f"invalid argument name (must be an identifier): {name!r}") + return name + + def _generate_ruby_arguments(args: list[dict]) -> str: """Generate Ruby arguments() method body.""" lines = [ @@ -98,28 +112,26 @@ def _generate_ruby_arguments(args: list[dict]) -> str: " args = OpenStudio::Measure::OSArgumentVector.new", ] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) # bare Ruby identifier — validated, never raw text atype = a.get("type", "String") required = a.get("required", True) req_str = "true" if required else "false" maker = _ARG_MAKERS.get(atype, "String") if atype == "Choice" and "values" in a: - # Choice args require values at construction time - vals = a["values"] - choices_arr = ", ".join(f'"{v}"' for v in vals) + # Choice args require values at construction time (escape each value) lines.append(f" {name}_choices = OpenStudio::StringVector.new") - for v in vals: - lines.append(f' {name}_choices << "{v}"') + for v in a["values"]: + lines.append(f' {name}_choices << "{_escape_ruby_str(str(v))}"') lines.append(f' {name} = OpenStudio::Measure::OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})') else: lines.append(f' {name} = OpenStudio::Measure::OSArgument.make{maker}Argument("{name}", {req_str})') if "display_name" in a: - lines.append(f' {name}.setDisplayName("{a["display_name"]}")') + lines.append(f' {name}.setDisplayName("{_escape_ruby_str(str(a["display_name"]))}")') else: display = name.replace("_", " ").title() lines.append(f' {name}.setDisplayName("{display}")') if "description" in a: - lines.append(f' {name}.setDescription("{a["description"]}")') + lines.append(f' {name}.setDescription("{_escape_ruby_str(str(a["description"]))}")') if "default_value" in a: dv = a["default_value"] if atype == "Double": @@ -129,7 +141,7 @@ def _generate_ruby_arguments(args: list[dict]) -> str: elif atype == "Boolean": lines.append(f" {name}.setDefaultValue({str(dv).lower()})") else: - lines.append(f' {name}.setDefaultValue("{dv}")') + lines.append(f' {name}.setDefaultValue("{_escape_ruby_str(str(dv))}")') lines.append(f" args << {name}") lines += [" return args", " end"] return "\n".join(lines) @@ -142,27 +154,26 @@ def _generate_python_arguments(args: list[dict]) -> str: " args = openstudio.measure.OSArgumentVector()", ] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) # bare Python identifier — validated, never raw text atype = a.get("type", "String") required = a.get("required", True) req_str = "True" if required else "False" maker = _ARG_MAKERS.get(atype, "String") if atype == "Choice" and "values" in a: - # Choice args require values at construction time - vals = a["values"] + # Choice args require values at construction time (escape each value) lines.append(f" {name}_choices = openstudio.StringVector()") - for v in vals: - lines.append(f' {name}_choices.append("{v}")') + for v in a["values"]: + lines.append(f' {name}_choices.append("{_escape_python_str(str(v))}")') lines.append(f' {name} = openstudio.measure.OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})') else: lines.append(f' {name} = openstudio.measure.OSArgument.make{maker}Argument("{name}", {req_str})') if "display_name" in a: - lines.append(f' {name}.setDisplayName("{a["display_name"]}")') + lines.append(f' {name}.setDisplayName("{_escape_python_str(str(a["display_name"]))}")') else: display = name.replace("_", " ").title() lines.append(f' {name}.setDisplayName("{display}")') if "description" in a: - lines.append(f' {name}.setDescription("{a["description"]}")') + lines.append(f' {name}.setDescription("{_escape_python_str(str(a["description"]))}")') if "default_value" in a: dv = a["default_value"] if atype == "Double": @@ -172,7 +183,7 @@ def _generate_python_arguments(args: list[dict]) -> str: elif atype == "Boolean": lines.append(f" {name}.setDefaultValue({dv!s})") else: - lines.append(f' {name}.setDefaultValue("{dv}")') + lines.append(f' {name}.setDefaultValue("{_escape_python_str(str(dv))}")') lines.append(f" args.append({name})") lines += [" return args"] return "\n".join(lines) @@ -182,7 +193,7 @@ def _generate_ruby_extraction(args: list[dict]) -> str: """Generate Ruby argument extraction lines for run() method.""" lines = [] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) atype = a.get("type", "String") # Choice args are extracted as strings at runtime getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String") @@ -194,7 +205,7 @@ def _generate_python_extraction(args: list[dict]) -> str: """Generate Python argument extraction lines for run() method.""" lines = [] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) atype = a.get("type", "String") # Choice args are extracted as strings at runtime getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String") @@ -477,6 +488,10 @@ def _update_measure_xml(measure_dir: Path, language: str): """ if language != "Ruby": return + # NOTE: `measure -u` only executes the GENERATED arguments() method (built from + # validated/escaped arg specs — see _checked_arg_name / _escape_*), NOT the + # LLM's run_body. So it is not untrusted code and is not sandbox-wrapped here + # (wrapping it as the sandbox uid breaks the in-place measure.xml rewrite). try: subprocess.run( ["openstudio", "measure", "-u", str(measure_dir)], @@ -926,11 +941,13 @@ def _test_reporting_measure_with_run( "run", "--postprocess_only", "-w", str(osw_path), ] log_path = run_dir / "openstudio.log" + run_env = sandbox.build_env(run_dir) + sandbox.prepare_workdir(run_dir) with log_path.open("w", encoding="utf-8") as log_f: proc = subprocess.run( - cmd, cwd=str(run_dir), + sandbox.wrap_cmd(cmd, run_dir), cwd=str(run_dir), stdout=log_f, stderr=subprocess.STDOUT, - env=os.environ.copy(), timeout=120, check=False, + env=run_env, timeout=120, check=False, ) log_text = log_path.read_text(encoding="utf-8", errors="replace") @@ -982,6 +999,14 @@ def test_measure_op( mdir = Path(measure_dir) if not mdir.is_dir(): return {"ok": False, "error": f"Measure directory not found: {measure_dir}"} + # Access control: only test measures from allowed roots, and never stage a + # tree with an escaping symlink (test_measure chowns + Landlock-grants this + # dir, so an arbitrary path here is a privilege/secret-disclosure vector). + if not is_path_allowed(mdir): + return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"} + err = reject_escaping_symlinks(mdir) + if err: + return {"ok": False, "error": err} # Detect language if (mdir / "measure.py").is_file(): @@ -1006,6 +1031,8 @@ def test_measure_op( src_model = None if model_path: src = Path(model_path) + if not is_path_allowed(src): + return {"ok": False, "error": f"model_path not allowed: {model_path}"} if src.is_file(): src_model = src if not src_model: @@ -1014,23 +1041,33 @@ def test_measure_op( shutil.copy2(str(src_model), str(test_model_dst)) if language == "Python": - proc = subprocess.run( - ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"], - cwd=str(mdir), - env=sandbox.build_env(mdir, redirect_tmp=False), - capture_output=True, text=True, timeout=60, check=False, - ) + test_cmd = ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"] else: # Run minitest directly (openstudio measure -r doesn't run minitest) test_files = list(test_dir.glob("*_test.rb")) if not test_files: return {"ok": False, "error": "No Ruby test files found"} + test_cmd = ["ruby", "-I", ".", str(test_files[0])] + + # Run the (untrusted) test code under the SAME confinement as apply_measure + # — UID drop, rlimits, Landlock, seccomp net-deny. TMPDIR is a private dir + # under the in-container /tmp (NOT mdir, which is on the Docker bind mount): + # pytest's capture uses an unlinked tempfile the bind mount can't keep open. + # It's granted to Landlock as an extra writable root and HOME stays in mdir. + tmp_dir = Path(tempfile.mkdtemp(prefix="osmcp_mtest_")) + try: + env = sandbox.build_env(mdir) # HOME=mdir (granted rw) + env["TMPDIR"] = str(tmp_dir) # off the bind mount, Landlock-granted below + sandbox.prepare_workdir(mdir) + sandbox.prepare_workdir(tmp_dir) proc = subprocess.run( - ["ruby", "-I", ".", str(test_files[0])], + sandbox.wrap_cmd(test_cmd, mdir, extra_rw=(str(tmp_dir),)), cwd=str(mdir), - env=sandbox.build_env(mdir, redirect_tmp=False), + env=env, capture_output=True, text=True, timeout=60, check=False, ) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) output = proc.stdout + proc.stderr passed = failed = errors = 0 From bfe7d21a75970d63b5432f143c35fcab7e6c39be Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:40:53 -0400 Subject: [PATCH 14/26] feat(security): enforce OSMCP_SIM_TIMEOUT_SECONDS; validate run_simulation inputs; reject escaping OSW symlinks (Copilot+Codex C4) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/skills/simulation/operations.py | 53 +++++++++++++++++++++- tests/test_sim_queue.py | 34 ++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index df8de0e..3b4bb24 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -24,10 +24,12 @@ MAX_CONCURRENCY_PER_USER, OSCLI_GEM_PATH, OSCLI_GEMFILE, + SIM_TIMEOUT_SECONDS, + is_path_allowed, user_run_root, ) from mcp_server.identity import user_key -from mcp_server.util import resolve_run_dir +from mcp_server.util import reject_escaping_symlinks, resolve_run_dir # Where the MCP server stores runs inside the container DEFAULT_LOG_TAIL = LOG_TAIL_DEFAULT @@ -322,8 +324,46 @@ def _dispatch_once() -> None: _queue.remove(run_id) +def _enforce_timeouts() -> None: + """Terminate running sims past the wall-clock cap and mark them failed. + + OSMCP_SIM_TIMEOUT_SECONDS=0 disables. The kill (terminate -> wait -> kill) + runs outside _sim_lock so a slow exit can't stall the dispatcher. + """ + if SIM_TIMEOUT_SECONDS <= 0: + return + now = _now() + with _sim_lock: + expired = [ + rec for rec in _RUNS.values() + if rec.status == "running" and rec.pid is not None + and rec.started_at is not None + and now - rec.started_at > SIM_TIMEOUT_SECONDS + ] + for rec in expired: + with contextlib.suppress(psutil.NoSuchProcess, Exception): + p = psutil.Process(rec.pid) + p.terminate() + try: + p.wait(timeout=5) + except psutil.TimeoutExpired: + p.kill() + with _sim_lock: + rec.status = "failed" + rec.ended_at = _now() + rec.exit_code = -1 if rec.exit_code is None else rec.exit_code + rec.error = (f"Simulation exceeded the {SIM_TIMEOUT_SECONDS:.0f}s wall-clock " + "cap (OSMCP_SIM_TIMEOUT_SECONDS)") + _RUNS[rec.run_id] = rec + _persist_run_record(rec) + audit("sim_timeout", run_id=rec.run_id, user=rec.user_key, + ran_seconds=round(now - (rec.started_at or now), 1)) + + def _dispatch_loop() -> None: while True: + with contextlib.suppress(Exception): + _enforce_timeouts() with contextlib.suppress(Exception): _dispatch_once() time.sleep(0.5) @@ -357,6 +397,13 @@ def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) src_osw = Path(osw_path).resolve() if not src_osw.exists(): return {"ok": False, "error": f"OSW not found: {osw_path}"} + # An OSW bundle may legitimately live anywhere (e.g. a temp staging dir), so we + # don't is_path_allowed the OSW path itself — but we must never FOLLOW a symlink + # in its tree that escapes the bundle (would copy a host file into the run dir). + # User-facing entry points (run_simulation) validate their inputs separately. + sym_err = reject_escaping_symlinks(src_osw.parent) + if sym_err: + return {"ok": False, "error": sym_err} # Fail fast on invalid OSW (before staging any run dir) v = validate_osw(str(src_osw), epw_path=epw_path) @@ -748,6 +795,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None osm = Path(osm_path) if not osm.exists(): return {"ok": False, "error": f"OSM file not found: {osm_path}"} + if not is_path_allowed(osm): + return {"ok": False, "error": f"OSM path not allowed: {osm_path}"} # Validate EPW path upfront if provided epw_abs: str | None = None @@ -755,6 +804,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None epw = Path(epw_path) if not epw.exists(): return {"ok": False, "error": f"EPW file not found: {epw_path}"} + if not is_path_allowed(epw): + return {"ok": False, "error": f"EPW path not allowed: {epw_path}"} epw_abs = str(epw.resolve()) # Stage the minimal OSW in a throwaway temp dir. run_osw() copies the staged diff --git a/tests/test_sim_queue.py b/tests/test_sim_queue.py index 03ed284..2227288 100644 --- a/tests/test_sim_queue.py +++ b/tests/test_sim_queue.py @@ -86,6 +86,40 @@ async def _run(): asyncio.run(_run()) +@pytest.mark.integration +def test_sim_timeout_kills_long_run(): + # Validates: OSMCP_SIM_TIMEOUT_SECONDS caps wall-clock — a sim that exceeds it + # is terminated and marked failed with a timeout error (runaway/DoS guard). A + # 2s cap fires long before the example-model sim could ever finish. + if not integration_enabled(): + pytest.skip("Set RUN_OPENSTUDIO_INTEGRATION=1 to enable integration tests.") + + with http_server({"MCP_AUTH": "none", "OSMCP_SIM_TIMEOUT_SECONDS": "2"}) as (url, _proc): + async def _run(): + async with http_session(url) as s: + cr = unwrap(await s.call_tool( + "create_example_osm", {"name": f"to_{uuid.uuid4().hex[:8]}"})) + assert cr["ok"] is True, cr + rs = unwrap(await s.call_tool( + "run_simulation", {"osm_path": cr["osm_path"], "epw_path": EPW_PATH})) + assert rs["ok"] is True, rs + run_id = rs["run_id"] + + status, err = rs["status"], "" + for _ in range(120): # up to 60s; the 2s cap fires far sooner + run = unwrap(await s.call_tool("get_run_status", {"run_id": run_id}))["run"] + status, err = run["status"], (run.get("error") or "") + if status in ("success", "failed", "cancelled", "error"): + break + await asyncio.sleep(0.5) + + assert status == "failed", f"timed-out sim must be 'failed', got {status!r}" + assert "cap" in err.lower() or "exceeded" in err.lower(), \ + f"error should indicate the wall-clock timeout; got {err!r}" + + asyncio.run(_run()) + + @pytest.mark.integration def test_cancelled_running_sim_stays_cancelled(): # Regression: _refresh_status reclassified a cancelled (running) run as "failed" From ae9a01cf7bb162fec62adee9d993744837155502 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 17:54:46 -0400 Subject: [PATCH 15/26] test(security): publish sandbox confinement suite + run it in CI Vulnerabilities are fixed (this PR), so the PoC suite is no longer held back. Runs in the dedicated security.yml workflow under OSMCP_SANDBOX=auto (each test still pins its own tier). Reports how Landlock+seccomp behave on the GitHub runner. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/security.yml | 24 +- tests/test_sandbox.py | 734 +++++++++++++++++++++++++++++++++ 2 files changed, 747 insertions(+), 11 deletions(-) create mode 100644 tests/test_sandbox.py diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 8f87f81..c3688e1 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -1,15 +1,16 @@ name: security -# Dedicated workflow for the security / sandbox-confinement suite, kept separate -# from the main `ci` shards so security tests live on their own. -# -# Manual-trigger only. The working exploit PoC (tests/test_sandbox.py) is -# deliberately kept OUT of the public repo (see .git/info/exclude) until the -# sandbox fix ships, so on a clean GitHub checkout this suite is a safe no-op — -# it runs only where the security tests are actually present (local / self-hosted -# checkout, or once the blocked-assertion suite is published). +# Dedicated workflow for the security / sandbox-confinement suite +# (tests/test_sandbox.py), kept separate from the main `ci` shards so the +# security tests run and report on their own. Runs the full sandbox tier +# (OSMCP_SANDBOX=auto); individual tests still pin their own mode per fixture +# (off/posix/auto), so this exercises every tier and reports how the kernel +# backends (Landlock + seccomp) behave on the GitHub runner. on: + push: + branches: [feat/measure-exec-sandbox] + pull_request: workflow_dispatch: jobs: @@ -25,14 +26,15 @@ jobs: env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" + OSMCP_SANDBOX: "auto" run: | mkdir -p runs FILES=$(ls tests/test_sandbox.py tests/test_security_*.py 2>/dev/null || true) if [ -z "$FILES" ]; then - echo "No security tests present in this checkout (PoC kept local) — nothing to run." + echo "No security tests present in this checkout — nothing to run." exit 0 fi - echo "Running security tests: $FILES" + echo "Running security tests under OSMCP_SANDBOX=$OSMCP_SANDBOX: $FILES" docker run --rm -v "$PWD:/repo" -v "$PWD/runs:/runs" \ - -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ openstudio-mcp:dev bash -lc "cd /repo && pytest -vv $FILES" diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py new file mode 100644 index 0000000..d97591c --- /dev/null +++ b/tests/test_sandbox.py @@ -0,0 +1,734 @@ +"""Confirm the measure-exec vulnerabilities that the sandbox (planned) will close. + +Context: docs/plans/measure-exec-sandbox.md. Today an applied measure runs as +root, with the server's full environment, unconfined filesystem, and open +network — see skills/measures/operations.py (`env=os.environ.copy()`, no UID +drop, no FS policy). These tests PROVE each hole exists, safely, using canaries +and decoys only (no real secret/file/host is touched, nothing leaves the box). + +Design — "prove the problem exists now" half of the dual-run plan: + - Server is spawned with OSMCP_SANDBOX=off, so these document the explicit + passthrough/escape-hatch behaviour and remain valid once the sandbox lands + (off = full passthrough, by design — the Codex `danger-full-access` model). + - Each probe attempts an attack and reports via runner.registerInfo("PROBE ..."). + The AUTHORITATIVE check is external ground truth (escape file on disk, bytes + the listener received, the uid value), not just the measure's self-report. + +Fix increment 1 has landed (clean-env floor + unconditional measure_dir check): + - test_measure_exec_unconfined_today (OSMCP_SANDBOX=off) still proves all five + holes — off is a true passthrough by design. + - test_clean_env_blocks_secret_leak (OSMCP_SANDBOX=posix) proves the env-secret + leak is closed and a normal run still succeeds (allowlist complete enough). + - test_measure_dir_traversal_blocked proves arbitrary out-of-root measure paths + are rejected (check is unconditional, not gated by OSMCP_SANDBOX). +Fix increment 2 (UID drop + rlimits) and increment 3 (Landlock FS + seccomp +net-deny) have landed: + - test_posix_drops_root_and_applies_rlimits — uid 1001, NPROC rlimit, DAC blocks + a root-owned write (OSMCP_SANDBOX=posix). + - test_full_tier_blocks_read_escape_and_network — Landlock blocks reading a + world-readable file outside the run dir, blocks writing outside it, and + seccomp blocks outbound IP networking (OSMCP_SANDBOX=auto), while the run + still succeeds. +With the full tier, all five holes are closed; off remains a true passthrough. + +Safety notes: + - Decoys live under RUN_ROOT / a tmpdir on the disposable runs volume and are + deleted in finally blocks. + - The network probe targets a localhost listener owned by this test process, + inside the same container — no traffic leaves the machine. +""" +import asyncio +import os +import shutil +import socket +import subprocess +import sys +import tempfile +import textwrap +import threading +import uuid +from pathlib import Path + +import pytest +from conftest import ( + EPW_PATH, + http_server, + http_session, + integration_enabled, + setup_example, + unwrap, +) + +pytestmark = pytest.mark.integration + +RUN_ROOT = Path( + os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs")), +) +_FIXTURE_MEASURE = Path(__file__).parent / "assets" / "measures" / "set_building_name" + +# Each probe attempts one attack and registers a "PROBE =" line. +# victim_path / escape_path / net_target are passed as measure arguments, so the +# same measure body exercises all five holes in a single `openstudio run`. +_RB_PROBE_BODY = """ +require 'socket' +runner.registerInfo("PROBE uid=#{Process.uid}") +runner.registerInfo("PROBE nproc=#{Process.getrlimit(:NPROC)[0]}") +runner.registerInfo("PROBE env=#{ENV['OSMCP_CANARY_SECRET'] || ''}") +begin + runner.registerInfo("PROBE read=#{File.read(victim_path).strip}") +rescue => e + runner.registerInfo("PROBE read_err=#{e.class}") +end +begin + File.write(escape_path, "ESCAPED") + runner.registerInfo("PROBE wrote=#{escape_path}") +rescue => e + runner.registerInfo("PROBE write_err=#{e.class}") +end +begin + host, port = net_target.split(":") + s = TCPSocket.new(host, port.to_i) + s.write("CANARY_EXFIL") + s.close + runner.registerInfo("PROBE net=sent") +rescue => e + runner.registerInfo("PROBE net_err=#{e.class}") +end +""" + +_PY_PROBE_BODY = """ +import os +import socket +import resource +runner.registerInfo("PROBE uid=%d" % os.getuid()) +runner.registerInfo("PROBE nproc=%d" % resource.getrlimit(resource.RLIMIT_NPROC)[0]) +runner.registerInfo("PROBE env=%s" % os.environ.get("OSMCP_CANARY_SECRET", "")) +try: + with open(victim_path) as f: + runner.registerInfo("PROBE read=%s" % f.read().strip()) +except Exception as e: + runner.registerInfo("PROBE read_err=%s" % type(e).__name__) +try: + with open(escape_path, "w") as f: + f.write("ESCAPED") + runner.registerInfo("PROBE wrote=%s" % escape_path) +except Exception as e: + runner.registerInfo("PROBE write_err=%s" % type(e).__name__) +try: + host, port = net_target.split(":") + s = socket.create_connection((host, int(port)), timeout=5) + s.sendall(b"CANARY_EXFIL") + s.close() + runner.registerInfo("PROBE net=sent") +except Exception as e: + runner.registerInfo("PROBE net_err=%s" % type(e).__name__) +""" + +_PROBE_ARGS = [ + {"name": "victim_path", "type": "String", "required": True, "default_value": ""}, + {"name": "escape_path", "type": "String", "required": True, "default_value": ""}, + {"name": "net_target", "type": "String", "required": True, "default_value": ""}, +] + + +def _unique(prefix: str = "pytest_sandbox") -> str: + return f"{prefix}_{uuid.uuid4().hex[:10]}" + + +def _probe_body(language: str) -> str: + """Indent the probe body to the base indent create_measure injects at.""" + if language == "Ruby": + return textwrap.indent(textwrap.dedent(_RB_PROBE_BODY).strip("\n"), " ") + return textwrap.indent(textwrap.dedent(_PY_PROBE_BODY).strip("\n"), " ") + + +class _CanaryListener: + """Localhost TCP listener that records the first payload it receives. + + Owned by the test process, bound to 127.0.0.1 inside the container — the + measure subprocess connects to it; nothing leaves the machine. + """ + + def __init__(self): + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.sock.bind(("127.0.0.1", 0)) + self.sock.listen(1) + self.sock.settimeout(30) + self.port = self.sock.getsockname()[1] + self.received = b"" + self._thread = threading.Thread(target=self._serve, daemon=True) + + def start(self): + self._thread.start() + + def _serve(self): + try: + conn, _ = self.sock.accept() + with conn: + conn.settimeout(5) + self.received = conn.recv(1024) + except OSError: + pass + finally: + self.sock.close() + + def close(self): + try: + self.sock.close() + except OSError: + pass + + +@pytest.fixture(scope="module") +def sandbox_server(): + """One HTTP server for the module, with a fake canary secret in its env. + + OSMCP_SANDBOX=off pins the current unconfined passthrough so these stay valid + once the sandbox exists. The canary is a fake value — never a real credential. + """ + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "off", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def confined_server(): + """An HTTP server with the POSIX floor on (OSMCP_SANDBOX=posix).""" + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "posix", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def full_server(): + """An HTTP server with the full tier (OSMCP_SANDBOX=auto: posix + Landlock + seccomp).""" + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "auto", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def net_allow_server(): + """Full tier but with the network deny opted out (OSMCP_SANDBOX_NET=allow).""" + if not integration_enabled(): + pytest.skip("integration disabled") + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "auto", + "OSMCP_SANDBOX_NET": "allow", + }) as (url, _proc): + yield url + + +async def _create_probe_measure(session, language: str) -> str: + res = unwrap(await session.call_tool("create_measure", { + "name": _unique("sandbox_probe_" + ("rb" if language == "Ruby" else "py")), + "description": "Sandbox vulnerability probe (test fixture).", + "run_body": _probe_body(language), + "language": language, + "arguments": _PROBE_ARGS, + })) + assert res.get("ok") is True, f"create_measure failed: {res}" + return res["measure_dir"] + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_measure_exec_unconfined_today(language, sandbox_server): + # Validates: an applied measure today runs as root, sees the server's secret + # env, reads outside its run dir, writes outside its run dir, and opens the + # network — the five holes the sandbox must close. Proven safely via canaries. + url, canary = sandbox_server + tag = uuid.uuid4().hex[:8] + victim_secret = f"VICTIM_SECRET_{tag}" + victim_dir = RUN_ROOT / f"_decoy_victim_{tag}" + victim_dir.mkdir(parents=True, exist_ok=True) + victim_file = victim_dir / "secret.txt" + victim_file.write_text(victim_secret, encoding="utf-8") + escape_file = RUN_ROOT / f"_decoy_escape_{tag}.txt" + listener = _CanaryListener() + listener.start() + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": str(victim_file), + "escape_path": str(escape_file), + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: each hole is real --- + # 1. Runs as root — no privilege drop. + assert "PROBE uid=0" in info, \ + f"measure should run as root (uid 0) today; info=\n{info}" + # 2. Server's secret env leaks into the measure (os.environ.copy()). + assert f"PROBE env={canary}" in info, \ + f"canary secret should leak into measure env today; info=\n{info}" + # 3. Reads a file outside its run dir (another run's area). + assert f"PROBE read={victim_secret}" in info, \ + f"measure should read the decoy victim file today; info=\n{info}" + # 4. Writes a file outside its run dir — ground truth: file on disk. + assert "PROBE wrote=" in info, f"measure should report a write; info=\n{info}" + assert escape_file.is_file() and escape_file.read_text() == "ESCAPED", \ + "measure should write outside its run dir today (escape file on disk)" + # 5. Opens the network — ground truth: listener received the payload. + assert "PROBE net=sent" in info, f"measure should report net send; info=\n{info}" + listener._thread.join(timeout=5) + assert listener.received == b"CANARY_EXFIL", \ + f"localhost listener should receive exfil today; got {listener.received!r}" + finally: + listener.close() + shutil.rmtree(victim_dir, ignore_errors=True) + escape_file.unlink(missing_ok=True) + + +def test_measure_dir_traversal_blocked(sandbox_server): + # Regression: apply_measure must reject a measure_dir outside all allowed + # roots (own run area / shared read roots), closing the traversal hole where + # any /tmp path holding a measure.rb was copied and executed. The check is + # unconditional (not gated by OSMCP_SANDBOX), so it holds even with sandbox off. + url, _canary = sandbox_server + outside_dir = Path(tempfile.mkdtemp(prefix="osmcp_decoy_measure_")) + decoy_measure = outside_dir / "set_building_name" + shutil.copytree(_FIXTURE_MEASURE, decoy_measure) + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + # --- Act --- + return unwrap(await s.call_tool("apply_measure", { + "measure_dir": str(decoy_measure), + "arguments": {"building_name": "Traversal Test"}, + })) + + res = asyncio.run(_run()) + + # --- Assert: out-of-root measure path is refused before any execution --- + assert res.get("ok") is False, \ + f"apply_measure must reject a measure_dir outside allowed roots; got {res}" + assert "not allowed" in str(res.get("error", "")).lower(), \ + f"error should explain the path is not allowed; got {res.get('error')}" + finally: + shutil.rmtree(outside_dir, ignore_errors=True) + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_clean_env_blocks_secret_leak(language, confined_server): + # Validates: OSMCP_SANDBOX=posix strips the server's secret env via the + # allowlist, so an applied measure no longer sees host secrets — AND a normal + # measure run still succeeds, proving the allowlist is complete enough. + url, canary = confined_server + + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": "", "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure under posix failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: canary secret is gone, the run still produced output --- + assert "PROBE env=" in info, \ + f"clean-env should hide the canary under posix; info=\n{info}" + assert canary not in info, \ + f"canary secret must not appear anywhere under posix; info=\n{info}" + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_posix_drops_root_and_applies_rlimits(language, confined_server): + # Validates: OSMCP_SANDBOX=posix runs the measure as the unprivileged sandbox + # uid (1001), applies the NPROC rlimit, and DAC then denies a write to a + # root-owned location outside the run dir (/opt — in-image, not a bind mount, + # so ownership is enforced regardless of host). Read/network are NOT yet closed + # at this tier (Landlock/seccomp follow) — not asserted here. + url, _canary = confined_server + tag = uuid.uuid4().hex[:8] + escape_file = Path(f"/opt/_sbx_escape_{tag}.txt") # root-owned, in-image + + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": "", "escape_path": str(escape_file), "net_target": "", + }, + })) + assert res.get("ok") is True, f"apply_measure under posix failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: privilege dropped, rlimit applied, DAC blocks the escape write --- + assert "PROBE uid=1001" in info, f"measure should drop to sandbox uid 1001; info=\n{info}" + assert "PROBE uid=0" not in info, "measure must not run as root under posix" + assert "PROBE nproc=1024" in info, f"NPROC rlimit (1024) should be applied; info=\n{info}" + assert "PROBE write_err=" in info, \ + f"write to root-owned /opt should be denied under posix; info=\n{info}" + assert not escape_file.exists(), "DAC must block writing a root-owned out-of-run-dir path" + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_full_tier_blocks_read_escape_and_network(language, full_server): + # Validates: OSMCP_SANDBOX=auto (Landlock FS + seccomp net-deny on top of the + # POSIX floor) blocks reading a world-readable file outside the run dir + # (root-owned /opt, not in the read-only allowlist — so DAC alone would NOT + # block it; Landlock does), blocks writing outside the run dir, and blocks + # outbound IP networking — while the measure run itself still succeeds. + url, _canary = full_server + tag = uuid.uuid4().hex[:8] + victim_secret = f"VICTIM_SECRET_{tag}" + victim_file = Path(f"/opt/_sbx_victim_{tag}.txt") # root-owned, in-image, not allowlisted + victim_file.write_text(victim_secret, encoding="utf-8") + escape_file = Path(f"/opt/_sbx_escape_{tag}.txt") + listener = _CanaryListener() + listener.start() + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": str(victim_file), + "escape_path": str(escape_file), + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure under full tier failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: read escape, write escape, and network all denied --- + assert "PROBE read_err=" in info, f"read outside run dir should be denied; info=\n{info}" + assert victim_secret not in info, "victim secret must not leak under the full tier" + assert "PROBE write_err=" in info, f"write outside run dir should be denied; info=\n{info}" + assert not escape_file.exists(), "Landlock must block the out-of-run-dir write" + assert "PROBE net_err=" in info, f"outbound network should be denied; info=\n{info}" + listener._thread.join(timeout=3) + assert listener.received == b"", f"seccomp must block exfil; got {listener.received!r}" + finally: + listener.close() + victim_file.unlink(missing_ok=True) + escape_file.unlink(missing_ok=True) + + +def test_landlock_confines_without_root(): + # Validates: the unprivileged confinement layer (Landlock) applies even when + # the shim runs as a NON-root uid — so a local non-root server still confines + # measure filesystem access; the uid-drop is simply skipped (no hard fail). + # This is what protects a local user, not just the root Docker server. + workdir = tempfile.mkdtemp() + cmd = [ + sys.executable, "-m", "mcp_server._sandbox_exec", + "--uid", "1001", "--gid", "1001", + "--landlock-ro", "/usr", "--landlock-ro", "/lib", "--landlock-ro", "/lib64", + "--landlock-rw", workdir, + "--", "cat", "/etc/hostname", # /etc not granted -> must be denied + ] + try: + # Run the shim AS uid 1001 (non-root) to exercise the no-root path. + proc = subprocess.run( # noqa: S603 - fixed argv, no shell + cmd, capture_output=True, text=True, cwd="/repo", + user=1001, group=1001, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, \ + f"Landlock must block /etc read even without root; rc=0 out={out}" + assert "denied" in out, f"expected a permission-denied error; out={out}" + finally: + shutil.rmtree(workdir, ignore_errors=True) + + +def test_local_host_files_protected(sandbox_server, full_server): + # Validates THE LOCAL ISSUE (dual-run, vuln -> fixed): the server bind-mounts + # the user's real host directories (here /repo). Unconfined, a measure an LLM + # authored writes straight into the user's checked-out repo on their machine; + # the sandbox blocks it. Benign — one canary file at the repo root, asserted + # PRESENT under off (the vulnerability) and ABSENT under auto (the fix), + # cleaned up either way. /repo is read-only in the sandbox allowlist, so the + # write is denied without breaking measures that legitimately read the repo. + tag = uuid.uuid4().hex[:8] + host_file = Path(f"/repo/_sandbox_local_canary_{tag}.txt") # /repo = bind-mounted host source + + async def _attempt_write(url): + async with http_session(url) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": str(host_file), "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + try: + # --- Vulnerability: unconfined (off) writes into the user's host repo --- + off_info = asyncio.run(_attempt_write(sandbox_server[0])) + assert "PROBE wrote=" in off_info, f"off: expected a successful write; {off_info}" + assert host_file.is_file(), \ + "VULN: an unconfined measure wrote into the user's bind-mounted host dir (/repo)" + host_file.unlink(missing_ok=True) + + # --- Fixed: confined (auto) denies the write --- + auto_info = asyncio.run(_attempt_write(full_server[0])) + assert "PROBE write_err=" in auto_info, f"auto: expected the write to be denied; {auto_info}" + assert not host_file.is_file(), \ + "FIXED: the sandbox (Landlock, /repo read-only) blocks writing to the user's host dir" + finally: + host_file.unlink(missing_ok=True) + + +def test_sandbox_net_allow_permits_network(net_allow_server): + # Validates: OSMCP_SANDBOX_NET=allow leaves outbound networking open under the + # full tier (the knob is actually wired into wrap_cmd; default deny is tested + # by test_full_tier_blocks_read_escape_and_network). + listener = _CanaryListener() + listener.start() + try: + async def _run(): + async with http_session(net_allow_server) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": "", "escape_path": "", + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + assert "PROBE net=sent" in info, f"net should be allowed; info=\n{info}" + listener._thread.join(timeout=5) + assert listener.received == b"CANARY_EXFIL", \ + f"OSMCP_SANDBOX_NET=allow should permit the connection; got {listener.received!r}" + finally: + listener.close() + + +def _trivial_body(language: str) -> str: + indent = " " if language == "Ruby" else " " + return f'{indent}runner.registerInfo("measure ok")' + + +async def _create_trivial_measure(session, language: str) -> str: + res = unwrap(await session.call_tool("create_measure", { + "name": _unique("tm_" + ("rb" if language == "Ruby" else "py")), + "description": "Trivial measure for test_measure confinement check.", + "run_body": _trivial_body(language), + "language": language, + "arguments": [], + })) + assert res.get("ok") is True, f"create_measure failed: {res}" + return res["measure_dir"] + + +_EVIL_TEST = ( + "import socket\n" + "def test_zzz_exfil():\n" + " try:\n" + " s = socket.create_connection(('127.0.0.1', {port}), timeout=5)\n" + " s.sendall(b'CANARY_EXFIL')\n" + " s.close()\n" + " except Exception:\n" + " pass\n" +) + + +def test_measure_test_code_confined(sandbox_server, full_server): + # Validates THE COPILOT FINDING fix (dual-run): test_measure now runs untrusted + # test code under the sandbox. Inject a test that exfils to a localhost + # listener; under off it connects (vuln — test code was unconfined), under auto + # seccomp blocks it (fixed). Asserts on the listener (ground truth). The off + # leg is the falsifiability control: it proves the evil test actually runs. + async def _run_test_measure(url, port): + async with http_session(url) as s: + measure_dir = await _create_trivial_measure(s, "Python") + evil = Path(measure_dir) / "tests" / "test_zzz_evil.py" + evil.write_text(_EVIL_TEST.format(port=port), encoding="utf-8") + await s.call_tool("test_measure", {"measure_dir": measure_dir}) + + # --- Vulnerability: off lets the (untrusted) test code reach the network --- + off_listener = _CanaryListener() + off_listener.start() + try: + asyncio.run(_run_test_measure(sandbox_server[0], off_listener.port)) + off_listener._thread.join(timeout=5) + assert off_listener.received == b"CANARY_EXFIL", \ + "VULN: test_measure ran test code that reached the network under off" + finally: + off_listener.close() + + # --- Fixed: auto confines test_measure (seccomp blocks the exfil) --- + auto_listener = _CanaryListener() + auto_listener.start() + try: + asyncio.run(_run_test_measure(full_server[0], auto_listener.port)) + auto_listener._thread.join(timeout=5) + assert auto_listener.received == b"", \ + f"FIXED: test_measure code must be net-confined under auto; got {auto_listener.received!r}" + finally: + auto_listener.close() + + +# --------------------------------------------------------------------------- +# Codex review reproductions (confirm-now): each PASSES today by demonstrating +# the open hole, run under the FULL tier (OSMCP_SANDBOX=auto) so it proves the +# sandbox does NOT yet stop it. Tagged with the Codex finding id + how to INVERT +# the assertion once the fix lands. All use canaries/decoys only. +# Not yet reproduced here (need heavier setup): C1 reporting-measure test path +# (needs a completed sim/SQL), C2 `openstudio measure -u` Ruby injection, +# H1 seccomp arch fail-open (needs an i386/x32 binary), H2 fail-open on a kernel +# without Landlock. +# --------------------------------------------------------------------------- + +def test_c3_escaping_symlink_rejected(full_server): + # Codex C3 (FIXED): a measure dir containing a symlink that escapes the measure + # root is rejected before any copy — no host-file content is inlined into the + # readable run dir. + url, _ = full_server + tag = uuid.uuid4().hex[:8] + secret_file = Path(f"/opt/_sbx_c3_{tag}.txt") # outside the measure dir + secret_file.write_text(f"C3_SECRET_{tag}", encoding="utf-8") + try: + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + mdir = Path(await _create_trivial_measure(s, "Ruby")) + (mdir / "evil_link.txt").symlink_to(secret_file) # escaping symlink + return unwrap(await s.call_tool("apply_measure", {"measure_dir": str(mdir)})) + + res = asyncio.run(_run()) + assert res.get("ok") is False and "symlink" in str(res.get("error", "")).lower(), \ + f"C3: an escaping symlink in the measure dir must be rejected; got {res}" + finally: + secret_file.unlink(missing_ok=True) + + +def test_c4_run_simulation_rejects_out_of_root_osm(full_server): + # Codex C4 (FIXED): run_simulation rejects an OSM outside all allowed roots + # (no staging arbitrary host files into the run dir). + url, _ = full_server + tag = uuid.uuid4().hex[:8] + outside_osm = Path(f"/opt/_sbx_c4_{tag}.osm") # outside every allowed root + try: + async def _run(): + async with http_session(url) as s: + cr = unwrap(await s.call_tool("create_example_osm", {"name": _unique()})) + assert cr["ok"] is True, cr + shutil.copy2(cr["osm_path"], outside_osm) # a valid OSM at a host path + res = unwrap(await s.call_tool("run_simulation", { + "osm_path": str(outside_osm), "epw_path": EPW_PATH})) + if res.get("ok") and res.get("run_id"): + await s.call_tool("cancel_run", {"run_id": res["run_id"]}) + return res + + res = asyncio.run(_run()) + assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \ + f"C4: out-of-root OSM must be rejected; got {res}" + finally: + outside_osm.unlink(missing_ok=True) + + +def test_c5_test_measure_rejects_out_of_root_dir(full_server): + # Codex C5 (FIXED): test_measure rejects a measure_dir outside all allowed + # roots (before it would chown / Landlock-grant an arbitrary path). + url, _ = full_server + outside = Path(tempfile.mkdtemp(prefix="osmcp_c5_")) + decoy = outside / "set_building_name" + shutil.copytree(_FIXTURE_MEASURE, decoy) + try: + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + return unwrap(await s.call_tool("test_measure", {"measure_dir": str(decoy)})) + + res = asyncio.run(_run()) + assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \ + f"C5: out-of-root measure_dir must be rejected; got {res}" + finally: + shutil.rmtree(outside, ignore_errors=True) + + +@pytest.fixture(scope="module") +def uid0_server(): + """Full tier but with a misconfigured sandbox uid of 0 (OSMCP_SANDBOX_UID=0).""" + if not integration_enabled(): + pytest.skip("integration disabled") + with http_server({ + "MCP_AUTH": "none", "OSMCP_SANDBOX": "auto", + "OSMCP_SANDBOX_UID": "0", "OSMCP_SANDBOX_GID": "0", + }) as (url, _proc): + yield url + + +def test_h5_sandbox_uid_zero_clamped(uid0_server): + # Codex H5 (FIXED): OSMCP_SANDBOX_UID=0 is rejected/clamped to the safe default, + # so confined code never runs as root even with the bad override. + async def _run(): + async with http_session(uid0_server) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": "", "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + assert "PROBE uid=0" not in info, \ + f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}" + assert "PROBE uid=1001" in info, \ + f"H5: sandbox uid must clamp to the safe default 1001; info=\n{info}" From 6efac2ef95ae85740b5de61d7e1cad9270059da6 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 18:07:51 -0400 Subject: [PATCH 16/26] ci(security): run integration suite under OSMCP_SANDBOX=auto explicitly; add sandbox user to arm64 image Both amd64 + arm64 integration test jobs now set OSMCP_SANDBOX=auto (no longer relying on the code default). Dockerfile.arm64 gains the unprivileged sandbox user for parity. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 10 ++++++++-- docker/Dockerfile.arm64 | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6ffc936..bc2f40c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -115,6 +115,9 @@ jobs: env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" + # Run the integration suite confined (explicit, not via the code default). + # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp. + OSMCP_SANDBOX: "auto" run: | mkdir -p runs case ${{ matrix.shard }} in @@ -126,7 +129,7 @@ jobs: esac docker run --rm \ -v "$PWD:/repo" -v "$PWD/runs:/runs" \ - -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ $EXTRA_ENV \ openstudio-mcp:arm64 bash -lc "cd /repo && pytest -vv -s $FILES" @@ -182,6 +185,9 @@ jobs: env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" + # Run the integration suite confined (explicit, not via the code default). + # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp. + OSMCP_SANDBOX: "auto" run: | mkdir -p runs case ${{ matrix.shard }} in @@ -215,6 +221,6 @@ jobs: esac docker run --rm \ -v "$PWD:/repo" -v "$PWD/runs:/runs" \ - -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ $EXTRA_ENV \ openstudio-mcp:dev bash -lc "cd /repo && pytest -vv -s $FILES" diff --git a/docker/Dockerfile.arm64 b/docker/Dockerfile.arm64 index 86808b8..b4fc60e 100644 --- a/docker/Dockerfile.arm64 +++ b/docker/Dockerfile.arm64 @@ -26,6 +26,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ruby-full \ && rm -rf /var/lib/apt/lists/* +# Unprivileged account that confined measure/simulation subprocesses drop to +# (parity with docker/Dockerfile; see mcp_server/_sandbox_exec.py). +RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox + # OpenStudio arm64 — official NREL .deb. Pulls in libstdc++ etc. as deps. RUN curl -fSL \ "https://github.com/NREL/OpenStudio/releases/download/v${OPENSTUDIO_VERSION}/OpenStudio-${OPENSTUDIO_VERSION}+${OPENSTUDIO_SHA}-Ubuntu-24.04-arm64.deb" \ From a38a280dbe9ca0bf4a280383243829f5a060a99b Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 18:11:58 -0400 Subject: [PATCH 17/26] ci(security): run sandbox suite on arm64 too (matrix amd64+arm64) Validates the aarch64 seccomp BPF (different syscall numbers) and Landlock confinement on the native arm64 runner, not just amd64. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/security.yml | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index c3688e1..ecb595c 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -3,9 +3,9 @@ name: security # Dedicated workflow for the security / sandbox-confinement suite # (tests/test_sandbox.py), kept separate from the main `ci` shards so the # security tests run and report on their own. Runs the full sandbox tier -# (OSMCP_SANDBOX=auto); individual tests still pin their own mode per fixture -# (off/posix/auto), so this exercises every tier and reports how the kernel -# backends (Landlock + seccomp) behave on the GitHub runner. +# (OSMCP_SANDBOX=auto) on BOTH amd64 and arm64 — the seccomp BPF and Landlock +# use arch-specific syscall numbers, so arm64 must be exercised explicitly. +# Individual tests still pin their own mode per fixture (off/posix/auto). on: push: @@ -15,14 +15,25 @@ on: jobs: security-tests: - runs-on: ubuntu-latest + name: security-tests (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - arch: amd64 + runner: ubuntu-latest + dockerfile: docker/Dockerfile + - arch: arm64 + runner: ubuntu-24.04-arm + dockerfile: docker/Dockerfile.arm64 steps: - uses: actions/checkout@v4 - - name: Build image - run: docker build -f docker/Dockerfile -t openstudio-mcp:dev . + - name: Build image (${{ matrix.arch }}) + run: docker build -f ${{ matrix.dockerfile }} -t openstudio-mcp:dev . - - name: Run security suite (if present) + - name: Run security suite (${{ matrix.arch }}) env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" @@ -34,7 +45,7 @@ jobs: echo "No security tests present in this checkout — nothing to run." exit 0 fi - echo "Running security tests under OSMCP_SANDBOX=$OSMCP_SANDBOX: $FILES" + echo "[${{ matrix.arch }}] security tests under OSMCP_SANDBOX=$OSMCP_SANDBOX: $FILES" docker run --rm -v "$PWD:/repo" -v "$PWD/runs:/runs" \ -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ openstudio-mcp:dev bash -lc "cd /repo && pytest -vv $FILES" From e565a953afd95afb50c36ba8e1d82daa9fe688d4 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 18:15:44 -0400 Subject: [PATCH 18/26] ci: add arch-sensitive test set to arm64 (shard 2) arm64 now runs, under OSMCP_SANDBOX=auto: shard 1 (SEB4 sim/EUI + weather/loops) and shard 2 (SWIG-memleak + stdout suppression [deb-vs-wheel], measure apply/authoring [Ruby/Python exec + bundler], an HVAC EnergyPlus sim). Security PoC runs on arm64 via the security workflow. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc2f40c..fe15393 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,14 +92,15 @@ jobs: retention-days: 1 arm64-test: - # Start of arm64 test matrix: shard 1 only (real EnergyPlus sim via SEB4 OSW, - # asserts EUI ≈ 1.875 ± 2%). Expand to [1..5] once arm64 sim parity is proven. + # arm64 test matrix: shard 1 = real EnergyPlus sim via SEB4 OSW (asserts EUI + # ≈ 1.875 ± 2%) + weather/loops; shard 2 = the arch-sensitive set (SWIG/stdout + # deb-vs-wheel, measure exec/bundler, an HVAC sim). Both run confined (auto). needs: arm64-build runs-on: ubuntu-24.04-arm strategy: fail-fast: false matrix: - shard: [1] + shard: [1, 2] steps: - uses: actions/checkout@v4 @@ -126,6 +127,15 @@ jobs: FILES="tests/test_example_workflows.py tests/test_component_properties.py tests/test_comstock.py tests/test_weather.py tests/test_weather_files.py tests/test_mcp_seb4.py tests/test_create_constructions.py tests/test_loop_operations.py tests/test_plant_loop_demand.py tests/test_sizing_properties.py tests/test_skill_retrofit.py tests/test_integration.py" EXTRA_ENV="-e MCP_OSW_PATH=tests/assets/SEB_model/SEB4_baseboard/workflow.osw -e EXPECTED_EUI=1.8750760248144998 -e EXPECTED_EUI_RTOL=0.02 -e EXPECTED_EUI_ATOL=0.0" ;; + 2) + # arm64 arch-sensitive coverage (the deb-vs-wheel / native-ISA risks): + # - SWIG memleak + stdout suppression (wheel-vs-deb behaviour) + # - measure apply + authoring (Ruby/Python exec + bundler on arm64) + # - an HVAC EnergyPlus sim (arm64 E+ build correctness) + # (The sandbox/security PoC runs on arm64 via the security workflow.) + FILES="tests/test_swig_memleak_cleanup.py tests/test_stdout_logger_silence.py tests/test_measures.py tests/test_measure_authoring.py tests/test_hvac_supply_sim.py" + EXTRA_ENV="" + ;; esac docker run --rm \ -v "$PWD:/repo" -v "$PWD/runs:/runs" \ From 5da03f4658423011693ceb4b615855d620edec86 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 18:38:29 -0400 Subject: [PATCH 19/26] fix(ci): set RUBYLIB on arm64 image so measure tests can require openstudio arm64-test shard 2 failed: 5 test_measure_authoring cases hit `require': cannot load such file -- openstudio (LoadError)`. test_measure runs Ruby minitest via raw `ruby -I .` (openstudio measure -r doesn't run minitest), so system ruby needs RUBYLIB pointing at the OpenStudio Ruby bindings. The amd64 nrel/openstudio base sets this; the arm64 (ubuntu+.deb) image never did, so the path was only exposed once arm64 shard 2 started running these tests. Symlink the install's Ruby dir to a stable /usr/local path and set RUBYLIB to it (parity with amd64). Both under /usr/local, which the sandbox Landlock policy already grants; RUBYLIB is already in the sandbox env allowlist. Co-Authored-By: Claude Opus 4.8 (1M context) --- docker/Dockerfile.arm64 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docker/Dockerfile.arm64 b/docker/Dockerfile.arm64 index b4fc60e..929eca9 100644 --- a/docker/Dockerfile.arm64 +++ b/docker/Dockerfile.arm64 @@ -44,9 +44,14 @@ RUN curl -fSL \ # The nrel/openstudio base image does this; the .deb does not. Without it, # every measure-based MCP tool fails at runtime (ComStock, common-measures, # apply_measure). Mirrors NREL/docker-openstudio's /var/oscli setup. +# A stable symlink (/usr/local/openstudio-Ruby) is created in the RUN below so +# RUBYLIB can be a fixed ENV — the install dir name carries a build-SHA suffix. +# Both link and target live under /usr/local, which the sandbox Landlock policy +# grants read+exec. RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VERSION}*" -type d | head -n1) \ && test -n "$OPENSTUDIO_FOLDER" || { echo "openstudio install dir not found"; exit 1; } \ && echo "OPENSTUDIO_FOLDER=$OPENSTUDIO_FOLDER" \ + && ln -sfn "$OPENSTUDIO_FOLDER/Ruby" /usr/local/openstudio-Ruby \ && gem install bundler -v ${OS_BUNDLER_VERSION} --no-document \ && gem install zip --no-document \ && mkdir -p /var/oscli \ @@ -57,6 +62,12 @@ RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VER && bundle _${OS_BUNDLER_VERSION}_ install --path=gems --without=native_ext --jobs=4 --retry=3 \ && test -d /var/oscli/gems || { echo "/var/oscli/gems missing after bundle install"; exit 1; } +# Let the system `ruby` resolve `require 'openstudio'` (parity with the amd64 +# nrel/openstudio base, which sets RUBYLIB to the install's Ruby dir). test_measure +# runs Ruby unit tests via `ruby -I .` directly — `openstudio measure -r` does not +# run minitest — so without this the measure tests fail with a LoadError. +ENV RUBYLIB=/usr/local/openstudio-Ruby + # ComStock measures (openstudio-standards-based templates for space types, # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB). ARG COMSTOCK_TAG=2025-3 From d2860f4abd300d65b239cf9fbaaaa7b2ead01547 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 20:45:18 -0400 Subject: [PATCH 20/26] =?UTF-8?q?fix(security):=20batch=201=20=E2=80=94=20?= =?UTF-8?q?Ruby=20interpolation=20RCE,=20fail-closed=20mode,=20env/timeout?= =?UTF-8?q?=20hardening?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex gpt-5.5 + Copilot round-2 review fixes (low-risk): - #1 (critical RCE): _escape_ruby_str now escapes '#'. A measure description/ default/choice value with '#{...}' was live Ruby interpolation in a double-quoted literal, executed when `openstudio measure -u` runs (unsandboxed, as root). - #6: unknown OSMCP_SANDBOX value (typo) normalized to 'auto' (fail-closed) instead of silently downgrading enabled-but-not-full to posix. - #7: drop the bare 'LC_' env allowlist prefix; enumerate standard locale categories so an LC_-prefixed host secret (e.g. LC_API_TOKEN) can't leak to measure code. - #9: _safe_float rejects NaN/inf so a non-finite SIM_TIMEOUT can't silently disable the wall-clock cap. - Copilot: _ARG_NAME_RE requires lowercase-leading (uppercase -> Ruby dynamic constant assignment SyntaxError in generated arguments()). - Copilot: _enforce_timeouts skips a dead-but-unreaped pid (_pid_alive) so a finished run isn't force-failed; left for the reaper to classify by exit code. Tests: 6 added (test_sandbox.py x3, test_measure_authoring.py x2, test_sim_queue.py x1). Verified 6 new + full measure_authoring + sim_queue (38 passed) under OSMCP_SANDBOX=auto. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/config.py | 32 ++++++++++- mcp_server/sandbox.py | 7 ++- .../skills/measure_authoring/operations.py | 20 +++++-- mcp_server/skills/simulation/operations.py | 4 ++ tests/test_measure_authoring.py | 55 +++++++++++++++++++ tests/test_sandbox.py | 41 ++++++++++++++ tests/test_sim_queue.py | 25 +++++++++ 7 files changed, 177 insertions(+), 7 deletions(-) diff --git a/mcp_server/config.py b/mcp_server/config.py index 56bbfc3..a541f2e 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -1,6 +1,8 @@ from __future__ import annotations +import math import os +import sys from pathlib import Path RUN_ROOT = Path(os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs"))).resolve() @@ -14,9 +16,14 @@ def _safe_int(env_val: str, default: int) -> int: def _safe_float(env_val: str, default: float) -> float: try: - return float(env_val) + v = float(env_val) except (ValueError, TypeError): return default + # Reject NaN/inf: a non-finite SIM_TIMEOUT would make both `<= 0` and the + # elapsed-time comparison false, silently disabling timeout enforcement. + if not math.isfinite(v): + return default + return v _raw_concurrency = os.environ.get("OPENSTUDIO_MCP_MAX_CONCURRENCY", os.environ.get("OSMCP_MAX_CONCURRENCY", "1")) MAX_CONCURRENCY = _safe_int(_raw_concurrency, 1) @@ -66,7 +73,28 @@ def _safe_float(env_val: str, default: float) -> float: # Secure by default. Degrades loudly: the unprivileged layers (Landlock/seccomp) # apply even for a non-root local server; on a platform without a kernel backend # (macOS/Windows bare installs) it falls back to clean-env only and logs a notice. -SANDBOX_MODE = os.environ.get("OSMCP_SANDBOX", "auto").strip().lower() +_SANDBOX_OFF_ALIASES = frozenset({"", "off", "0", "false", "no"}) +_SANDBOX_ON_MODES = frozenset({"posix", "auto", "full", "landlock"}) + + +def _normalize_sandbox_mode(raw: str) -> str: + """Map OSMCP_SANDBOX to a known mode, FAIL-CLOSED on an unrecognized value. + + A typo like ``atuo`` would otherwise leave ``enabled()`` True but ``_full()`` + False — silently downgrading from the default Landlock+seccomp tier to + posix-only. To never weaken confinement on a typo, force the most-restrictive + ``auto`` and warn loudly. Off-aliases are honored as-is (disabling is explicit). + """ + v = (raw or "").strip().lower() + if v in _SANDBOX_OFF_ALIASES or v in _SANDBOX_ON_MODES: + return v + print(f"[sandbox] unknown OSMCP_SANDBOX={raw!r}; falling back to 'auto' " + "(fail-closed). Valid values: off, posix, auto, full, landlock.", + file=sys.stderr) + return "auto" + + +SANDBOX_MODE = _normalize_sandbox_mode(os.environ.get("OSMCP_SANDBOX", "auto")) # Network policy for confined subprocesses: deny (default) blocks outbound TCP # once the seccomp backend lands; allow leaves it open (trusted/BCL deployments). SANDBOX_NET = os.environ.get("OSMCP_SANDBOX_NET", "deny").strip().lower() diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 556a1bc..335ff5e 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -69,8 +69,13 @@ # BUNDLE___ auth tokens) into untrusted measure code. "BUNDLE_WITHOUT", "BUNDLE_PATH", "BUNDLE_GEMFILE", "BUNDLE_FROZEN", "BUNDLE_DEPLOYMENT", "GEM_HOME", "GEM_PATH", + # Standard POSIX locale categories ONLY — enumerated, never prefix-matched: a + # bare LC_ prefix would also copy a host secret named e.g. LC_API_TOKEN. + "LC_CTYPE", "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY", + "LC_MESSAGES", "LC_PAPER", "LC_NAME", "LC_ADDRESS", "LC_TELEPHONE", + "LC_MEASUREMENT", "LC_IDENTIFICATION", }) -_ENV_ALLOW_PREFIXES = ("LC_",) # locale only (e.g. LC_CTYPE); never a secret prefix +_ENV_ALLOW_PREFIXES: tuple[str, ...] = () # no prefix matching — exact names only def enabled() -> bool: diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 39bd079..3ef1067 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -33,7 +33,10 @@ def custom_measures_dir() -> Path: _MEASURE_NAME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]{0,99}$") # Argument names become bare Ruby/Python identifiers in generated code — they must # be plain identifiers, never arbitrary text (else code injection at generation). -_ARG_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$") +# Must start lowercase/underscore: an uppercase-leading name (e.g. "Foo") is a Ruby +# *constant*, so `Foo = ...` inside a method is a "dynamic constant assignment" +# SyntaxError that breaks the generated Ruby measure. +_ARG_NAME_RE = re.compile(r"^[a-z_][a-zA-Z0-9_]{0,63}$") # Argument type -> SDK factory method name suffix _ARG_MAKERS = { @@ -87,8 +90,15 @@ def _to_class_name(snake: str) -> str: def _escape_ruby_str(s: str) -> str: - """Escape a string for safe embedding in a Ruby double-quoted string.""" - return s.replace("\\", "\\\\").replace('"', '\\"') + """Escape a string for safe embedding in a Ruby double-quoted string. + + Besides backslash and the closing quote, `#` MUST be escaped: in a Ruby + double-quoted literal `#{...}` (and `#@`, `#$`) is interpolation, so an + un-escaped `#` in user-controlled text (description / default / choice value) + is arbitrary Ruby execution when the generated measure runs `openstudio + measure -u`. `\\#` yields a literal `#`. + """ + return s.replace("\\", "\\\\").replace('"', '\\"').replace("#", "\\#") def _escape_python_str(s: str) -> str: @@ -101,7 +111,9 @@ def _checked_arg_name(name: str) -> str: Ruby/Python variable in generated code, so arbitrary text would be code injection. Raises ValueError otherwise (callers wrap in ok:false).""" if not isinstance(name, str) or not _ARG_NAME_RE.match(name): - raise ValueError(f"invalid argument name (must be an identifier): {name!r}") + raise ValueError( + "invalid argument name (must be a lowercase-leading identifier, " + f"matching [a-z_][a-zA-Z0-9_]*): {name!r}") return name diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index 3b4bb24..3c25725 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -339,6 +339,10 @@ def _enforce_timeouts() -> None: if rec.status == "running" and rec.pid is not None and rec.started_at is not None and now - rec.started_at > SIM_TIMEOUT_SECONDS + # Skip a process that already exited but hasn't been reaped yet: let + # _dispatch_once()/_refresh_status() classify it by its real exit code + # instead of force-failing a run that may have completed successfully. + and _pid_alive(rec.pid) ] for rec in expired: with contextlib.suppress(psutil.NoSuchProcess, Exception): diff --git a/tests/test_measure_authoring.py b/tests/test_measure_authoring.py index b2606f7..b48ad4e 100644 --- a/tests/test_measure_authoring.py +++ b/tests/test_measure_authoring.py @@ -1148,3 +1148,58 @@ async def _run(): # Should still include measure_dir for debugging assert "measure_dir" in res asyncio.run(_run()) + + +@pytest.mark.integration +def test_ruby_description_interpolation_neutralized(): + # Regression: a measure description was emitted into a Ruby double-quoted + # literal WITHOUT escaping '#', so '#{...}' was live interpolation. When + # `openstudio measure -u` (run unsandboxed, as root) evaluated description(), + # the payload executed = RCE. _escape_ruby_str must escape '#' -> '\#'. + if not integration_enabled(): + pytest.skip("integration disabled") + from pathlib import Path + + payload = "harmless pwn#{2+2} tail" + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + name = _unique("rb_inject") + res = unwrap(await s.call_tool("create_measure", { + "name": name, + "description": payload, + "run_body": RUBY_BODY, + "language": "Ruby", + })) + assert res["ok"] is True, res + rb = Path(res["measure_dir"], "measure.rb").read_text(encoding="utf-8") + # The escaped form must be present and the live-interpolation form absent. + assert "pwn\\#{2+2}" in rb, f"description '#' not escaped:\n{rb}" + assert "pwn#{2+2}" not in rb, f"live Ruby interpolation survived:\n{rb}" + asyncio.run(_run()) + + +@pytest.mark.integration +def test_uppercase_arg_name_rejected(): + # Regression: an uppercase-leading arg name became a bare Ruby local `Foo = ...`, + # which Ruby parses as dynamic constant assignment (SyntaxError) inside a method, + # breaking the generated measure. _ARG_NAME_RE must reject uppercase-leading names. + if not integration_enabled(): + pytest.skip("integration disabled") + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + res = unwrap(await s.call_tool("create_measure", { + "name": _unique("up_arg"), + "description": "uppercase arg name", + "run_body": RUBY_BODY, + "language": "Ruby", + "arguments": [{"name": "BadName", "type": "String", "required": False}], + })) + assert res["ok"] is False, f"uppercase arg name must be rejected: {res}" + assert "argument name" in res.get("error", "").lower(), res + asyncio.run(_run()) diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index d97591c..3701923 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -732,3 +732,44 @@ async def _run(): f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}" assert "PROBE uid=1001" in info, \ f"H5: sandbox uid must clamp to the safe default 1001; info=\n{info}" + + +def test_unknown_sandbox_mode_fails_closed(): + # Regression: a typo'd OSMCP_SANDBOX (e.g. 'atuo') left enabled()=True but + # _full()=False, silently downgrading the default Landlock+seccomp tier to + # posix-only. An unknown value must normalize to 'auto' (fail-closed). + from mcp_server import config + assert config._normalize_sandbox_mode("atuo") == "auto" + assert config._normalize_sandbox_mode("landlok") == "auto" + assert config._normalize_sandbox_mode("AUTO") == "auto" + assert config._normalize_sandbox_mode(" posix ") == "posix" + assert config._normalize_sandbox_mode("off") == "off" + assert config._normalize_sandbox_mode("") == "" # explicit off-alias, honored + + +def test_safe_float_rejects_non_finite(): + # Regression: a NaN/inf SIM_TIMEOUT made both `<= 0` and the elapsed-time + # comparison false, silently disabling wall-clock timeout enforcement. + from mcp_server import config + assert config._safe_float("nan", 7200.0) == 7200.0 + assert config._safe_float("inf", 7200.0) == 7200.0 + assert config._safe_float("-inf", 7200.0) == 7200.0 + assert config._safe_float("bad", 7200.0) == 7200.0 + assert config._safe_float("3.5", 7200.0) == pytest.approx(3.5) + + +def test_build_env_drops_lc_prefixed_secret(monkeypatch, tmp_path): + # Regression: a bare 'LC_' allowlist prefix copied any LC_*-named host var + # (e.g. LC_API_TOKEN) into untrusted measure code. Only standard locale + # categories may pass; arbitrary names — even LC_-prefixed — are dropped. + from mcp_server import sandbox + monkeypatch.setattr(sandbox, "SANDBOX_MODE", "posix") # force enabled() + filtering + monkeypatch.setenv("LC_CTYPE", "en_US.UTF-8") + monkeypatch.setenv("LC_API_TOKEN", "supersecret") + monkeypatch.setenv("MY_SECRET", "nope") + monkeypatch.setenv("PATH", "/usr/bin") + env = sandbox.build_env(tmp_path, redirect_tmp=False) + assert env.get("LC_CTYPE") == "en_US.UTF-8", "standard locale var must pass" + assert "LC_API_TOKEN" not in env, "LC_-prefixed secret must be dropped" + assert "MY_SECRET" not in env, "non-allowlisted secret must be dropped" + assert env.get("PATH") == "/usr/bin", "PATH is required and allowlisted" diff --git a/tests/test_sim_queue.py b/tests/test_sim_queue.py index 2227288..74343ef 100644 --- a/tests/test_sim_queue.py +++ b/tests/test_sim_queue.py @@ -161,3 +161,28 @@ async def _run(): await asyncio.sleep(0.3) asyncio.run(_run()) + + +@pytest.mark.integration +def test_enforce_timeouts_skips_dead_pid(monkeypatch): + # Regression: _enforce_timeouts marked a finished-but-unreaped run as 'failed' + # without checking _pid_alive, clobbering a possibly-successful result. A run + # whose process already exited must be left for the reaper to classify by exit + # code, not force-failed by the timeout enforcer. + import time + + from mcp_server.skills.simulation import operations as ops + + monkeypatch.setattr(ops, "SIM_TIMEOUT_SECONDS", 1.0) + started = time.time() - 10_000 # well past the 1s cap + rec = ops.RunRecord( + run_id="t_dead_pid", user_key="u", name="n", status="running", + created_at=started, started_at=started, ended_at=None, + pid=2_000_000_000, # bogus pid -> _pid_alive() is False (no such process) + run_dir=Path("/runs/none"), osw_path=Path("/runs/none/in.osw"), + epw_path=None, exit_code=None, error=None, + ) + monkeypatch.setitem(ops._RUNS, rec.run_id, rec) + ops._enforce_timeouts() + assert ops._RUNS[rec.run_id].status == "running", \ + "dead-but-unreaped run was force-failed instead of left for the reaper" From 3f942b233b6c09f19e58482d1ac72dd570a20e50 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 21:11:52 -0400 Subject: [PATCH 21/26] =?UTF-8?q?fix(security):=20batch=202=20=E2=80=94=20?= =?UTF-8?q?per-file=20/dev=20Landlock=20rules=20+=20process-group=20kill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex gpt-5.5 review fixes (medium): - #4: replace the whole-/dev rw Landlock grant with per-file rules — /dev/null, /dev/zero (rw) and /dev/urandom, /dev/random (ro). A /dev dir grant also exposed writable /dev/shm + /dev/mqueue (shared storage/IPC outside the run dir) and permitted mknod. _landlock._add now masks a rule on a non-directory to file-only rights (a dir-only bit on a file makes add_rule return EINVAL -> fail-closed). - #8: sims launch with start_new_session=True (own process-group leader) and timeout/cancel now kill the whole group via _kill_process_group(), reaping forked children (EnergyPlus, helpers) a single-pid kill would orphan. Only ever signals a group we created (pgid == pid) — never the server's own group. Tests: red-green verified (both fail on unfixed source, pass with fix). 2 added (test_sandbox.py wrap_cmd device rules, test_sim_queue.py group kill). Full test_sandbox + test_sim_queue + test_measures green under OSMCP_SANDBOX=auto. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/_landlock.py | 12 +++- mcp_server/sandbox.py | 17 ++++-- mcp_server/skills/simulation/operations.py | 65 ++++++++++++++++------ tests/test_sandbox.py | 17 ++++++ tests/test_sim_queue.py | 33 +++++++++++ 5 files changed, 121 insertions(+), 23 deletions(-) diff --git a/mcp_server/_landlock.py b/mcp_server/_landlock.py index 7d0a784..ae1fcfd 100644 --- a/mcp_server/_landlock.py +++ b/mcp_server/_landlock.py @@ -12,6 +12,7 @@ import ctypes import os +import stat # Generic syscall table numbers (identical on x86_64 and aarch64). _NR_CREATE_RULESET = 444 @@ -45,6 +46,10 @@ | _A_REMOVE_DIR | _A_REMOVE_FILE | _A_MAKE_CHAR | _A_MAKE_DIR | _A_MAKE_REG | _A_MAKE_SOCK | _A_MAKE_FIFO | _A_MAKE_BLOCK | _A_MAKE_SYM ) +# Rights that are meaningful on a non-directory. Landlock's add_rule returns +# EINVAL if a rule on a FILE carries directory-only rights (READ_DIR, MAKE_*, +# REMOVE_*), so a per-file rule (e.g. /dev/null) MUST be masked to these. +_FILE_ONLY = _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_TRUNCATE | _A_IOCTL_DEV class _RulesetAttr(ctypes.Structure): @@ -97,7 +102,12 @@ def _add(path: str, access: int) -> bool: except OSError: return True # absent path — optional, not a failure try: - pba = _PathBeneathAttr(allowed_access=access & handled, parent_fd=fd) + acc = access & handled + # A rule on a non-directory (e.g. /dev/null) must carry only file + # rights — dir-only bits make add_rule fail with EINVAL. + if not stat.S_ISDIR(os.fstat(fd).st_mode): + acc &= _FILE_ONLY + pba = _PathBeneathAttr(allowed_access=acc, parent_fd=fd) return libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) == 0 finally: os.close(fd) diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 335ff5e..e4b11d0 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -52,6 +52,14 @@ _RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", "/opt/venv", "/usr/local", "/var/oscli", "/proc", "/sys") +# Specific device files the confined process needs — granted individually instead +# of the whole /dev directory. A `/dev` rw grant also exposes writable /dev/shm and +# /dev/mqueue (shared storage / IPC outside the run dir) and permits mknod. These +# are file-level Landlock rules, so only the file-access bits apply — no directory +# powers. Read-write: the bit-bucket + zero source; read-only: the entropy sources. +_DEV_RW = ("/dev/null", "/dev/zero") +_DEV_RO = ("/dev/urandom", "/dev/random") + _warned_no_backend = False # one-shot log when no kernel backend (non-Linux) # Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately @@ -188,11 +196,12 @@ def wrap_cmd(cmd: list[str], work_dir: Path | str, if value and value > 0: wrapped += [flag, str(value)] if _full(): - for path in _ro_paths(): + for path in (*_ro_paths(), *_DEV_RO): wrapped += ["--landlock-ro", path] - # rw: the run dir (+ caller extras), plus /dev for /dev/null & /dev/urandom - # (node creation there is still barred by DAC for the unprivileged uid). - for path in (str(work_dir), "/dev", *extra_rw): + # rw: the run dir (+ caller extras) + the writable device FILES only — NOT + # the whole /dev dir (which would also expose writable /dev/shm, /dev/mqueue + # and permit mknod). See _DEV_RW / _DEV_RO above. + for path in (str(work_dir), *_DEV_RW, *extra_rw): wrapped += ["--landlock-rw", path] # Network deny is the default; OSMCP_SANDBOX_NET=allow opts out (trusted # deployments that legitimately fetch e.g. BCL components). diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index 3c25725..fdf0b85 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -3,7 +3,9 @@ import contextlib import json +import os import shutil +import signal import subprocess import tempfile import threading @@ -253,6 +255,37 @@ def _pid_alive(pid: int) -> bool: return False +def _kill_process_group(pid: int, *, grace: float = 5.0) -> None: + """Terminate the run's whole process group (SIGTERM, then SIGKILL after grace). + + Sims launch with start_new_session=True, so the child is its own group leader + (pgid == pid); killing the group reaps forked children (EnergyPlus, ruby + helpers) that a single-pid kill would orphan. We only signal a group we created + (pgid == pid) — never the server's own group. Falls back to a single-pid kill + when the group can't be resolved or on non-POSIX (no os.killpg). + """ + pgid = None + if hasattr(os, "getpgid") and hasattr(os, "killpg"): + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + pgid = os.getpgid(pid) + if pgid is not None and pgid == pid: + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + os.killpg(pgid, signal.SIGTERM) + with contextlib.suppress(psutil.NoSuchProcess, psutil.TimeoutExpired): + psutil.Process(pid).wait(timeout=grace) + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + os.killpg(pgid, signal.SIGKILL) # ESRCH if the group already exited + return + # Fallback: pgid unavailable / not a leader / non-POSIX — single-pid kill. + with contextlib.suppress(psutil.NoSuchProcess, Exception): + p = psutil.Process(pid) + p.terminate() + try: + p.wait(timeout=grace) + except psutil.TimeoutExpired: + p.kill() + + def _build_run_cmd(osw_path: Path) -> list[str]: """openstudio CLI command to run a staged OSW (with bundle flags).""" return [ @@ -277,6 +310,11 @@ def _launch(rec: RunRecord) -> None: stdout=log.open("w", encoding="utf-8"), stderr=subprocess.STDOUT, env=run_env, + # New session => the child is its own process-group leader (pgid == pid), + # so a timeout/cancel can kill the WHOLE group (EnergyPlus + any forked + # helpers) without ever signalling the server's own group. See + # _kill_process_group(). No-op on non-POSIX. + start_new_session=True, ) rec.pid = proc.pid rec.status = "running" @@ -345,13 +383,7 @@ def _enforce_timeouts() -> None: and _pid_alive(rec.pid) ] for rec in expired: - with contextlib.suppress(psutil.NoSuchProcess, Exception): - p = psutil.Process(rec.pid) - p.terminate() - try: - p.wait(timeout=5) - except psutil.TimeoutExpired: - p.kill() + _kill_process_group(rec.pid) # whole group: EnergyPlus + any forked helpers with _sim_lock: rec.status = "failed" rec.ended_at = _now() @@ -699,13 +731,15 @@ def cancel_run(run_id: str) -> dict[str, Any]: audit("sim_cancelled", run_id=run_id, user=rec.user_key) return {"ok": True, "run_id": run_id, "cancelled": True} + if not _pid_alive(pid): + # Finished before the cancel landed — classify by exit code, don't mislabel + # a completed run as cancelled. + with _sim_lock: + rec = _refresh_status(rec) + _RUNS[run_id] = rec + return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status} try: - p = psutil.Process(pid) - p.terminate() - try: - p.wait(timeout=5) - except psutil.TimeoutExpired: - p.kill() + _kill_process_group(pid) # whole group (EnergyPlus + helpers), not just the leader with _sim_lock: rec.status = "cancelled" rec.ended_at = rec.ended_at or _now() @@ -713,11 +747,6 @@ def cancel_run(run_id: str) -> dict[str, Any]: _persist_run_record(rec) # disk record must read 'cancelled' so GC can reclaim it audit("sim_cancelled", run_id=run_id, user=rec.user_key) return {"ok": True, "run_id": run_id, "cancelled": True} - except psutil.NoSuchProcess: - with _sim_lock: - rec = _refresh_status(rec) - _RUNS[run_id] = rec - return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status} except Exception as e: return {"ok": False, "run_id": run_id, "error": str(e)} diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index 3701923..24ebcac 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -773,3 +773,20 @@ def test_build_env_drops_lc_prefixed_secret(monkeypatch, tmp_path): assert "LC_API_TOKEN" not in env, "LC_-prefixed secret must be dropped" assert "MY_SECRET" not in env, "non-allowlisted secret must be dropped" assert env.get("PATH") == "/usr/bin", "PATH is required and allowlisted" + + +def test_wrap_cmd_grants_device_files_not_whole_dev(monkeypatch, tmp_path): + # Regression: granting /dev rw also exposed writable /dev/shm + /dev/mqueue + # (shared storage/IPC outside the run dir) and permitted mknod. Confine to the + # specific device FILES instead (file-level Landlock rules). + from mcp_server import sandbox + monkeypatch.setattr(sandbox, "SANDBOX_MODE", "auto") # full tier + monkeypatch.setattr(sandbox, "SANDBOX_NET", "deny") + monkeypatch.setattr(sandbox, "_has_backend", lambda: True) + wrapped = sandbox.wrap_cmd(["echo", "hi"], tmp_path) + rw = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-rw"] + ro = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-ro"] + assert "/dev" not in rw, f"must not grant the whole /dev dir rw: {rw}" + assert "/dev/null" in rw and "/dev/zero" in rw, f"missing rw device files: {rw}" + assert "/dev/urandom" in ro and "/dev/random" in ro, f"missing ro device files: {ro}" + assert "/dev/shm" not in rw and "/dev/shm" not in ro, "/dev/shm must never be granted" # noqa: S108 diff --git a/tests/test_sim_queue.py b/tests/test_sim_queue.py index 74343ef..7cc93e2 100644 --- a/tests/test_sim_queue.py +++ b/tests/test_sim_queue.py @@ -186,3 +186,36 @@ def test_enforce_timeouts_skips_dead_pid(monkeypatch): ops._enforce_timeouts() assert ops._RUNS[rec.run_id].status == "running", \ "dead-but-unreaped run was force-failed instead of left for the reaper" + + +@pytest.mark.integration +def test_kill_process_group_terminates_session_leader(): + # Regression: timeout/cancel killed only the leader pid, orphaning forked + # children (EnergyPlus). Runs now launch in their own session (start_new_session) + # and _kill_process_group signals the whole group; the kernel delivers killpg to + # every group member, so verifying the new-session leader dies via the group path + # is sufficient (and never touches the server's own group). + import contextlib as _ctx + import os + import subprocess + import time + + import psutil + + from mcp_server.skills.simulation import operations as ops + + if not hasattr(os, "getpgid"): + pytest.skip("POSIX process groups not available") + p = subprocess.Popen(["sleep", "60"], start_new_session=True) # noqa: S607 + try: + assert os.getpgid(p.pid) == p.pid, "start_new_session should make pid the group leader" + ops._kill_process_group(p.pid) # SIGTERMs the group and reaps the leader + deadline = time.time() + 5 + while psutil.pid_exists(p.pid) and time.time() < deadline: + time.sleep(0.1) + assert not psutil.pid_exists(p.pid), "session-leader survived group kill" + finally: + with _ctx.suppress(Exception): + p.kill() + with _ctx.suppress(Exception): + p.wait(timeout=2) From ab0d4b8f3b22f01f226158f7b9162657d9e10e6e Mon Sep 17 00:00:00 2001 From: brianlball Date: Sat, 6 Jun 2026 21:49:10 -0400 Subject: [PATCH 22/26] =?UTF-8?q?fix(security):=20batch=203=20=E2=80=94=20?= =?UTF-8?q?run=5Fosw=20access=20gate,=20test=5Fmeasure=20private=20staging?= =?UTF-8?q?,=20drop=20/inputs=20RO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex gpt-5.5 review fixes (the staging/policy refactors): - #2 (critical, cross-tenant read): the PUBLIC run_osw now is_path_allowed-gates the OSW (and EPW) BEFORE reading/copying — it copies the OSW's whole parent dir into the run dir, so an un-gated path let a client read another tenant's run or host files (e.g. run_osw(/runs///in.osw)). A keyword-only _internal=True (not exposed via MCP) is the trusted path for run_simulation, whose temp OSW is server-built and whose inputs it already validated. - #3 (high, shared-content mutation): test_measure copied test_model.osm into, chowned, and Landlock-granted the caller's measure dir. For a measure under a shared read-only root (e.g. a bundled measure) that corrupted shared content. Now: measures outside the caller's own writable run root are copied into a private run dir and tested there; own-root measures still test in place (xml update persists). Private copy is cleaned up. - #5 (high, cross-tenant read): drop INPUT_ROOT (/inputs) from the Landlock RO allowlist — it's shared/multi-tenant and inputs are staged into the run dir anyway. Documented the DAC rationale for keeping /etc,/proc,/sys (uid-1001 already blocks secret reads). Tests: red-green verified per fix (each fails on unfixed source). run_osw gate + internal-path (test_path_safety), test_measure-no-shared-mutation + /inputs-not-RO (test_sandbox). 107 passed across path_safety/sandbox/measures/measure_authoring/ sim_queue under OSMCP_SANDBOX=auto. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/sandbox.py | 14 ++++- .../skills/measure_authoring/operations.py | 14 +++++ mcp_server/skills/simulation/operations.py | 28 ++++++--- tests/test_path_safety.py | 61 ++++++++++++++++++- tests/test_sandbox.py | 47 ++++++++++++++ 5 files changed, 153 insertions(+), 11 deletions(-) diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index e4b11d0..1f8a61d 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -31,7 +31,6 @@ from mcp_server.config import ( COMMON_MEASURES_DIR, COMSTOCK_MEASURES_DIR, - INPUT_ROOT, SANDBOX_GID, SANDBOX_MODE, SANDBOX_NET, @@ -116,11 +115,22 @@ def _ro_paths() -> list[str]: /repo is deliberately NOT granted: the measure runs from the copied run dir, and the shim imports mcp_server before applying Landlock — so the run never needs to read the source tree (which could hold a host .env). + + INPUT_ROOT (/inputs) is deliberately NOT granted either: it is a SHARED, + multi-tenant dir, so granting it would let one tenant's confined measure read + another's inputs. Inputs are staged into the run dir before execution, so the + confined process reads the staged copy, never /inputs directly. + + The broad system roots (/etc, /proc, /sys) are intentional and DAC-mitigated: + OpenStudio/Ruby/EnergyPlus need bits of each (certs, nsswitch, /proc/self, + cpuinfo), and the uid-1001 drop means the kernel already blocks reads of + non-world-readable secrets (/etc/shadow) and other users' /proc//environ. + Landlock here is defense-in-depth on top of DAC, not the only barrier. """ return [ *_RO_SYSTEM, str(COMSTOCK_MEASURES_DIR), str(COMMON_MEASURES_DIR), - str(INPUT_ROOT), str(SKILLS_DIR), + str(SKILLS_DIR), ] diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 3ef1067..a48d217 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -1007,6 +1007,7 @@ def test_measure_op( 3. SystemD_baseline.osm from tests/assets or /inputs 4. No test_model.osm → test template falls back to empty Model.new() """ + _private_copy: Path | None = None try: mdir = Path(measure_dir) if not mdir.is_dir(): @@ -1020,6 +1021,16 @@ def test_measure_op( if err: return {"ok": False, "error": err} + # If the measure isn't in the caller's OWN writable area (e.g. a bundled or + # otherwise shared read-only measure), copy it into a private run dir and test + # the copy. test_measure writes test_model.osm + chowns + Landlock-grants the + # dir, so it must never mutate shared/host content. Own-run-root measures are + # tested in place (the caller's to mutate; the measure.xml update persists). + if not is_path_allowed(mdir, write=True): + _private_copy = Path(tempfile.mkdtemp(prefix="measure_test_", dir=user_run_root())) + shutil.copytree(mdir, _private_copy, symlinks=True, dirs_exist_ok=True) + mdir = _private_copy + # Detect language if (mdir / "measure.py").is_file(): language = "Python" @@ -1130,6 +1141,9 @@ def test_measure_op( return {"ok": False, "error": "Test run timed out (60s)"} except Exception as e: return {"ok": False, "error": f"Failed to test measure: {e}"} + finally: + if _private_copy is not None: + shutil.rmtree(_private_copy, ignore_errors=True) def edit_measure_op( diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index fdf0b85..d8b537f 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -420,7 +420,8 @@ def _enqueue(run_id: str) -> None: _queue.append(run_id) -def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) -> dict[str, Any]: +def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None, + *, _internal: bool = False) -> dict[str, Any]: """ Stage the OSW + referenced files into /runs// and execute: openstudio run -w @@ -429,14 +430,25 @@ def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) - We always validate the OSW JSON and its seed_file reference (if any). - If no EPW override is provided, we also require the OSW's weather_file (if set) to exist. - If an EPW override is provided, we allow a missing OSW weather_file because it will be replaced. + + Access control: as a PUBLIC tool, run_osw copies the OSW's whole parent dir into + the run dir — so an un-gated path discloses arbitrary host / other-tenant files. + Public callers must pass an OSW (and EPW) under their own run root or a shared + read root (is_path_allowed). `_internal=True` is the trusted path for + run_simulation, whose server-built temp OSW lives in a private staging dir and + whose inputs it has already validated; it is NOT exposed via the MCP tool. """ src_osw = Path(osw_path).resolve() if not src_osw.exists(): return {"ok": False, "error": f"OSW not found: {osw_path}"} - # An OSW bundle may legitimately live anywhere (e.g. a temp staging dir), so we - # don't is_path_allowed the OSW path itself — but we must never FOLLOW a symlink - # in its tree that escapes the bundle (would copy a host file into the run dir). - # User-facing entry points (run_simulation) validate their inputs separately. + if not _internal: + # Gate BEFORE reading/validating/copying anything from the path. + if not is_path_allowed(src_osw): + return {"ok": False, "error": f"OSW path not allowed: {osw_path}"} + if epw_path and not is_path_allowed(Path(epw_path)): + return {"ok": False, "error": f"EPW path not allowed: {epw_path}"} + # We must never FOLLOW a symlink in the OSW's tree that escapes the bundle + # (would copy a host file into the run dir), even for the trusted internal path. sym_err = reject_escaping_symlinks(src_osw.parent) if sym_err: return {"ok": False, "error": sym_err} @@ -858,5 +870,7 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None osw_path_out = stage / "workflow.osw" osw_path_out.write_text(json.dumps(osw, indent=2), encoding="utf-8") - # Delegate to run_osw — pass epw_path so it handles staging into files/ - return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name) \ No newline at end of file + # Delegate to run_osw — pass epw_path so it handles staging into files/. + # _internal=True: osm + epw are already is_path_allowed above and the temp + # OSW is server-built in a private staging dir (not a caller-supplied path). + return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name, _internal=True) \ No newline at end of file diff --git a/tests/test_path_safety.py b/tests/test_path_safety.py index 32d6d2a..16aa24d 100644 --- a/tests/test_path_safety.py +++ b/tests/test_path_safety.py @@ -72,7 +72,7 @@ def kill(self): monkeypatch.setattr(_subprocess, "Popen", _FakePopen) - result = run_osw(str(osw_path)) + result = run_osw(str(osw_path), _internal=True) # exercise staging/flatten, not the access gate # Staging happens before subprocess — verify unconditionally assert "escapes" not in result.get("error", ""), f"Path traversal error: {result}" run_dirs = list(run_root.iterdir()) @@ -122,7 +122,7 @@ def kill(self): monkeypatch.setattr(_subprocess, "Popen", _FakePopen) - result = run_osw(str(osw_path)) + result = run_osw(str(osw_path), _internal=True) # exercise staging/flatten, not the access gate # Staging happens before subprocess — verify unconditionally assert "escapes" not in result.get("error", ""), "Normal seed incorrectly rejected" run_dirs = list(run_root.iterdir()) @@ -461,3 +461,60 @@ def test_bad_end_month(self): result = set_run_period(begin_month=1, begin_day=1, end_month=13, end_day=31) assert result["ok"] is False assert "end_month" in result["error"] + + +class TestRunOswAccessControl: + """#2 (Codex): the PUBLIC run_osw must reject an OSW outside the caller's + allowed roots — it copies the OSW's whole parent dir into the run dir, so an + un-gated path is a cross-tenant / host file-disclosure vector.""" + + def _stub(self, monkeypatch, run_root): + monkeypatch.setattr( + "mcp_server.skills.simulation.operations.user_run_root", lambda: run_root) + monkeypatch.setattr( + "mcp_server.skills.simulation.operations._ensure_dispatcher", lambda: None) + + def test_out_of_policy_osw_rejected(self, tmp_path, monkeypatch): + # Regression: run_osw skipped is_path_allowed and _copy_tree'd the OSW's whole + # parent dir into the run dir, disclosing arbitrary host / other-tenant files. + outside = tmp_path / "other_tenant" + outside.mkdir() + (outside / "secret.osm").write_text("another tenant's model") + (outside / "workflow.osw").write_text(json.dumps({"seed_file": "secret.osm"})) + run_root = tmp_path / "runs" + run_root.mkdir() + self._stub(monkeypatch, run_root) + + from mcp_server.skills.simulation.operations import run_osw + result = run_osw(str(outside / "workflow.osw")) # public call (no _internal) + assert result["ok"] is False, f"out-of-policy OSW must be rejected: {result}" + assert "not allowed" in result.get("error", "").lower(), result + assert list(run_root.iterdir()) == [], "nothing may be staged from a rejected path" + + def test_internal_trusted_path_still_stages(self, tmp_path, monkeypatch): + # Validates: the internal staging path (run_simulation's server-built temp OSW) + # still works via _internal=True, even from a dir outside the allowed roots. + stage = tmp_path / "stage" + stage.mkdir() + (stage / "model.osm").write_text("seed") + (stage / "workflow.osw").write_text(json.dumps({"seed_file": "model.osm"})) + run_root = tmp_path / "runs" + run_root.mkdir() + self._stub(monkeypatch, run_root) + + class _FakePopen: + def __init__(self, *a, **kw): + self.returncode = 1 + self.pid = 999 + def poll(self): + return self.returncode + def wait(self, timeout=None): + return self.returncode + def kill(self): + pass + + monkeypatch.setattr(_subprocess, "Popen", _FakePopen) + from mcp_server.skills.simulation.operations import run_osw + result = run_osw(str(stage / "workflow.osw"), _internal=True) + assert "not allowed" not in result.get("error", "").lower(), result + assert len(list(run_root.iterdir())) == 1, f"internal staging should create a run dir: {result}" diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index 24ebcac..efcc932 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -790,3 +790,50 @@ def test_wrap_cmd_grants_device_files_not_whole_dev(monkeypatch, tmp_path): assert "/dev/null" in rw and "/dev/zero" in rw, f"missing rw device files: {rw}" assert "/dev/urandom" in ro and "/dev/random" in ro, f"missing ro device files: {ro}" assert "/dev/shm" not in rw and "/dev/shm" not in ro, "/dev/shm must never be granted" # noqa: S108 + + +def test_test_measure_does_not_mutate_shared_source(tmp_path, monkeypatch): + # Regression (Codex #3): test_measure executed in the caller's measure dir, + # writing test_model.osm + chowning + Landlock-granting it. For a measure under a + # SHARED read-only root (e.g. a bundled measure) that mutates shared/host content. + # The fix copies a non-writable-root measure into a private run dir and tests the + # copy, leaving the source untouched. (Monkeypatches config ROOTS — environment + # setup, like RUN_ROOT — not the unit under test.) + from mcp_server import config + from mcp_server.skills.measure_authoring import operations as mops + + shared_root = tmp_path / "shared" + measure = shared_root / "mymeasure" + (measure / "tests").mkdir(parents=True) + (measure / "measure.rb").write_text("# fake ModelMeasure\nclass M\nend\n") + (measure / "tests" / "mymeasure_test.rb").write_text("require 'openstudio'\n") + model = shared_root / "seed.osm" + model.write_text("OS:Version,;\n") # a readable file to copy as the test model + + # Treat shared_root as a shared READ-ONLY root; private writable area elsewhere. + monkeypatch.setattr(config, "_SHARED_READ_ROOTS", + [*config._SHARED_READ_ROOTS, shared_root.resolve()]) + run_root = tmp_path / "runs" + run_root.mkdir() + monkeypatch.setattr(mops, "user_run_root", lambda: run_root) + + mops.test_measure_op(str(measure), model_path=str(model)) + + assert not (measure / "tests" / "test_model.osm").exists(), \ + "test_model.osm was written into the SHARED measure source — must use a private copy" + assert sorted(p.name for p in (measure / "tests").iterdir()) == ["mymeasure_test.rb"], \ + "shared measure source/tests was mutated" + + +def test_input_root_not_in_landlock_ro_allowlist(): + # Regression (Codex #5): /inputs (INPUT_ROOT) is a SHARED multi-tenant dir; + # granting it read in the Landlock policy let one tenant's confined measure read + # another tenant's inputs. Inputs are staged into the run dir, so /inputs must + # NOT be a read-only root. + from mcp_server import sandbox + from mcp_server.config import COMMON_MEASURES_DIR, COMSTOCK_MEASURES_DIR, INPUT_ROOT + ro = sandbox._ro_paths() + assert str(INPUT_ROOT) not in ro, f"INPUT_ROOT must not be a Landlock RO root: {ro}" + # The legitimately-needed shared measure roots are still granted. + assert str(COMSTOCK_MEASURES_DIR) in ro and str(COMMON_MEASURES_DIR) in ro, \ + f"bundled-measure roots must stay readable: {ro}" From eb82af8802ec5df083576b66254e46509947b05f Mon Sep 17 00:00:00 2001 From: brianlball Date: Sun, 7 Jun 2026 10:25:38 -0400 Subject: [PATCH 23/26] Harden post-test measure metadata refresh --- codex-security-review-develop.MD | 159 +++++++++++++ ...curity-review-feat-measure-exec-sandbox.MD | 214 ++++++++++++++++++ .../skills/measure_authoring/operations.py | 73 +++++- tests/test_sandbox.py | 60 +++++ 4 files changed, 495 insertions(+), 11 deletions(-) create mode 100644 codex-security-review-develop.MD create mode 100644 codex-security-review-feat-measure-exec-sandbox.MD diff --git a/codex-security-review-develop.MD b/codex-security-review-develop.MD new file mode 100644 index 0000000..2622440 --- /dev/null +++ b/codex-security-review-develop.MD @@ -0,0 +1,159 @@ +# Codex Security Review: `develop` + +Reviewed June 7, 2026. + +## Findings + +### Critical: Arbitrary measure code runs with the MCP container's authority + +Measures and tests execute arbitrary Ruby/Python as the container user, with +the full environment and all container mounts. `apply_measure` invokes +`openstudio` directly with `os.environ.copy()`. + +`test_measure` directly runs `pytest` or Ruby. Since the image has no `USER`, +this is probably root inside the container. + +### Critical: `run_osw` is also arbitrary-code execution + +An OSW can contain measure steps. `run_osw` accepts an unrestricted path, +copies the OSW's entire parent directory, and executes it. It must be sandboxed +exactly like `apply_measure`. + +### Critical: Simulation paths bypass the established path policy + +`run_simulation` accepts unrestricted OSM and EPW paths. A caller can cause an +arbitrary readable file to be copied into their run directory. This bypasses +the otherwise established `is_path_allowed()` policy. + +### Critical: `test_measure` modifies and executes in the source directory + +`test_measure` writes `tests/test_model.osm`, runs tests in the caller-provided +measure directory, and updates `measure.xml`. If the directory is under +`/repo`, `/inputs`, or another host mount, untrusted code can modify host files. + +### High: Writable bind mounts expose the host filesystem + +The documented launch configuration mounts `/inputs` and `/runs` read-write. +Docker bind mounts are host filesystem access; container processes can modify +or delete host files unless the mounts are read-only. + +### High: The container lacks basic runtime hardening + +The container has no non-root `USER`, read-only root filesystem, capability +drop, `no-new-privileges`, network restriction, PID limit, or memory/CPU limit. + +### High: Documentation describes sandbox controls absent from `develop` + +The sandbox plan claims several protections are complete, but the referenced +`sandbox.py`, Landlock, seccomp, and execution shim are absent from the +`develop` checkout. This creates dangerous false assurance. + +### Medium: Process lifecycle controls are incomplete + +Simulations have no enforced wall-clock timeout and cancellation kills only +the recorded parent PID, not the complete process group or cgroup. + +### Medium: Generated Ruby strings permit interpolation + +Ruby string generation does not escape `#{...}` interpolation. Metadata fields +could execute expressions when `openstudio measure -u` evaluates the generated +measure. + +## Recommended Architecture + +Do not run untrusted code as a subprocess inside the long-lived MCP container. +Use two components: + +```text +MCP control plane + | + | job manifest + staged files + v +Job runner/orchestrator + | + v +Ephemeral sandbox worker + - one job only + - input mount read-only + - private output mount read-write + - no /repo, shared /runs, /inputs, home, or Docker socket + - no network + - destroyed after completion +``` + +The MCP server should: + +1. Validate requested paths. +2. Copy only explicitly referenced OSM, EPW, measure, and OSW files into a + private job input directory. +3. Submit an immutable job manifest containing hashes and limits. +4. Receive only approved result artifacts. +5. Never receive the Docker socket. A separate trusted runner should create + worker containers. + +The worker should use controls equivalent to: + +```text +--runtime=runsc +--network=none +--read-only +--user=10001:10001 +--cap-drop=ALL +--security-opt=no-new-privileges=true +--pids-limit=256 +--memory= +--cpus= +--tmpfs=/tmp:rw,noexec,nosuid,nodev,size=... +input:/job/input:ro +output:/job/output:rw +``` + +Also use the default or tighter seccomp profile, AppArmor/SELinux policy, a +hard wall timeout, storage quotas, and kill the entire worker container on +cancellation. + +## Tool Policy + +- `run_simulation`: sandboxed worker, with an internally generated OSW + containing no measure steps. +- `run_osw`: sandboxed worker; disabled by default for remote users or reject + measure steps unless explicitly authorized. +- `apply_measure`: sandboxed worker. +- `test_measure`: sandboxed worker operating on a staged copy. +- `openstudio measure -u`: sandboxed worker because it loads measure code. +- Syntax checks: sandboxed too. Tests and imports can execute code through + plugins and startup behavior. + +Measure authoring itself can remain in the control plane if it only writes text +into a private user directory. Any evaluation of that text must leave the +control plane. + +## Deployment Priority + +1. Immediately mount `/inputs` and `/repo` read-only; mount only a dedicated + output directory read-write. +2. Run the MCP image as non-root and add Docker resource/security flags. +3. Disable `run_osw`, `apply_measure`, and `test_measure` in remote deployments + until isolated execution exists. +4. Add `is_path_allowed()` checks to every simulation and measure path. +5. Stage inputs into private job directories and reject symlinks and escaping + OSW references. +6. Implement ephemeral worker containers. +7. Use gVisor `runsc` for untrusted multi-user workloads; use microVMs if the + threat model includes determined hostile tenants. +8. Add attack tests proving filesystem escape, environment leakage, network + access, fork exhaustion, and cross-user access succeed without the sandbox + and fail with it. + +Rootless Docker or user-namespace remapping reduces host impact, but neither +replaces per-job isolation. Static measure scanning and human approval are +useful secondary controls, not containment. + +## Sources + +- [Docker bind mounts](https://docs.docker.com/engine/storage/bind-mounts/) +- [Docker container security options](https://docs.docker.com/reference/cli/docker/container/run) +- [Docker seccomp](https://docs.docker.com/engine/security/seccomp/) +- [Docker rootless mode](https://docs.docker.com/engine/security/rootless/) +- [gVisor](https://gvisor.dev/docs/) +- [gVisor with Docker](https://gvisor.dev/docs/user_guide/quick_start/docker/) diff --git a/codex-security-review-feat-measure-exec-sandbox.MD b/codex-security-review-feat-measure-exec-sandbox.MD new file mode 100644 index 0000000..f8ae884 --- /dev/null +++ b/codex-security-review-feat-measure-exec-sandbox.MD @@ -0,0 +1,214 @@ +# Codex Security Review: `feat/measure-exec-sandbox` + +Reviewed June 7, 2026. + +## Verdict + +The branch makes substantial, technically sound improvements over `develop`. +Landlock, seccomp, environment filtering, UID dropping, path checks, symlink +checks, rlimits, and simulation process-group termination materially reduce the +blast radius of authored measures. + +It is a good defense-in-depth sandbox for a local or trusted single-container +deployment. It is not yet a sufficient hostile multi-tenant boundary. One +critical post-test sandbox escape remains, and process/resource isolation is +still weaker than a per-job container or microVM. + +## Findings + +### Critical: sandboxed tests are followed by unsandboxed Ruby execution + +After `test_measure_op` runs untrusted tests in the sandbox, it calls +`_update_measure_xml(mdir, language)`. That helper executes: + +```text +openstudio measure -u +``` + +without `sandbox.build_env()`, `sandbox.prepare_workdir()`, or +`sandbox.wrap_cmd()`. + +A sandboxed test has write access to its measure directory. It can replace +`measure.rb` with top-level Ruby code, finish successfully, and rely on the +root MCP process to load that modified file during the XML update. This crosses +the sandbox boundary and restores arbitrary code execution with the server's +authority. + +Required fix: + +- Never evaluate a file after it has been writable by untrusted code using an + unsandboxed process. +- Run `openstudio measure -u` through the sandbox. +- Prefer running it on a fresh private copy and copying only validated + `measure.xml` metadata back. +- Alternatively, update checksums/metadata with a non-executing XML writer. + +### High: measure and test timeout handling can leave descendants alive + +`apply_measure`, reporting-measure tests, and normal measure tests use +`subprocess.run(..., timeout=...)` without a new session or cgroup. On timeout, +Python terminates the immediate wrapper process, but forked or daemonized +children can survive. + +Those descendants retain their inherited Landlock/seccomp policy, but they can +continue consuming CPU, PIDs, memory, and storage. They can also interfere with +other sandbox workers that share UID 1001. + +Required fix: + +- Start every untrusted execution in a new session/process group. +- On timeout, send TERM and then KILL to the complete process group. +- Prefer a per-job cgroup or container and destroy that unit on completion. + +### High: shared UID and PID namespace weaken cross-user isolation + +Every worker uses UID 1001 in the same container and PID namespace. `/proc` is +also granted read access. Concurrent jobs can observe process metadata and may +signal same-UID processes. Depending on kernel ptrace policy, they may also +inspect or manipulate peer processes. + +The default simulation concurrency of one does not cover concurrent +`apply_measure` or `test_measure` calls. + +Required fix: + +- Use a separate PID namespace or container for each job. +- At minimum, deny `ptrace`, `process_vm_readv`, `process_vm_writev`, and + cross-worker signaling where practical. +- Use per-job UIDs if multiple jobs remain in one PID namespace. +- Do not grant the host/container-wide `/proc`; mount a private proc filesystem. + +### High: broad `/etc`, `/proc`, and `/sys` read grants expose unnecessary data + +The Landlock policy grants all of `/etc`, `/proc`, and `/sys` read access. +Dropped UID permissions block some sensitive files, but world-readable +configuration, process metadata, mounted configuration secrets, kernel details, +and service information remain visible. + +Required fix: + +- Determine the exact runtime files OpenStudio requires and grant only those. +- Stage required certificates, locale data, timezone data, and configuration + into a minimal runtime root. +- Use mount/PID namespaces where broad compatibility paths are unavoidable. + +### Medium: runtime hardening is not part of the documented default launch + +The image adds a sandbox UID, but the long-lived MCP server still runs as root. +The README launch example does not apply `--read-only`, `--cap-drop=ALL`, +`no-new-privileges`, PID/memory/CPU limits, or read-only input mounts. + +The subprocess sandbox reduces measure risk, but ordinary MCP server bugs still +execute with root authority inside a container with writable host mounts. + +Recommended fix: + +- Run the control-plane server as non-root where OpenStudio permits it. +- Make `/inputs`, `/repo`, and `/skills` read-only. +- Apply container resource limits and capability restrictions. +- Keep only a dedicated output/job directory writable. + +### Medium: sandbox status is not actually surfaced + +`active_tier()` exists but is not included in tool responses or audit events. +It infers the tier from platform/configuration rather than a successful backend +probe. Full mode does fail closed at execution time, which is good, but +operators cannot inspect the effective state before submitting work. + +Recommended fix: + +- Probe Landlock/seccomp at startup. +- Expose the result in `get_server_status`, audit logs, and every execution + result. +- Distinguish requested mode from verified active controls. + +### Medium: public `run_osw` still copies the entire parent directory + +Path and escaping-symlink checks close the arbitrary-host-file issue, but +copying the whole parent directory is still unnecessarily broad. An OSW in a +large allowed directory can copy unrelated data and consume substantial disk. + +Recommended fix: + +- Parse the OSW and stage only explicitly referenced seed, weather, measure, + file, and companion resources. +- Reject absolute and parent-traversing references unless separately authorized. +- Apply file-count and byte limits during staging. + +### Low: rlimit installation failures do not fail closed + +The shim logs an rlimit failure and continues. Landlock and seccomp correctly +fail closed, but resource backstops can silently be absent. + +Recommended fix: + +- Treat configured mandatory limits as fatal. +- Record applied limits in the execution result. + +## Changes That Correctly Address `develop` + +- `apply_measure`, reporting-measure tests, normal tests, and simulations now + pass through a common sandbox wrapper. +- The subprocess environment is explicitly allowlisted instead of inheriting + server secrets. +- Full mode fails closed if Landlock or seccomp cannot engage. +- Landlock is read-deny-by-default and limits writes to the job directory. +- `/repo` and shared `/inputs` are no longer granted to the confined process. +- `/dev` is reduced to specific device files. +- Seccomp denies IPv4/IPv6 sockets and handles unexpected architectures and x32 + fail-closed. +- `io_uring_setup` is denied to prevent a network-filter bypass. +- The child drops to a validated non-root UID/GID and sets `no_new_privs`. +- Public simulation/OSW paths now use `is_path_allowed()`. +- Escaping symlinks are rejected before staging. +- Shared measures are copied before tests mutate them. +- Simulation wall-clock timeout and process-group cancellation are implemented. +- Ruby interpolation and generated argument identifiers are validated. +- Unknown sandbox modes and non-finite timeout values no longer silently weaken + policy. +- Security tests use dual-run canaries to prove both the original vulnerability + and the confined behavior. + +## How I Would Design It Differently + +For untrusted or remote multi-user use, I would make the process sandbox a +secondary control and use a per-job worker as the primary boundary: + +```text +non-root MCP control plane + -> validated immutable job manifest + -> trusted runner service + -> ephemeral worker container using gVisor/runsc +``` + +Each worker would receive: + +- One read-only staged input directory containing only referenced files. +- One private writable output directory. +- No `/repo`, shared `/runs`, `/inputs`, user home, credentials, or Docker + socket. +- `--network=none`. +- A read-only root filesystem and bounded tmpfs. +- Non-root UID, all capabilities dropped, `no-new-privileges`. +- Dedicated PID, mount, IPC, user, and network namespaces. +- Cgroup limits for memory, CPU, PIDs, I/O, and wall time. +- Automatic destruction on success, failure, cancellation, or timeout. + +The control plane would validate and copy back only expected artifacts. It +would never execute generated Ruby or Python after a worker has modified it. + +This costs more operationally, but it gives a much clearer security claim: +arbitrary measure code can compromise only an ephemeral worker and its private +job data, not the long-lived MCP process or another user's execution. + +## Verification + +Focused Windows-compatible tests: + +```text +46 passed, 16 skipped, 1 deselected +``` + +The excluded test directly exercises Linux non-root Landlock behavior and +cannot run on Windows. Docker integration could not be rerun during this review +because access to the local Docker Desktop engine was denied. diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index a48d217..30bb848 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -500,17 +500,68 @@ def _update_measure_xml(measure_dir: Path, language: str): """ if language != "Ruby": return - # NOTE: `measure -u` only executes the GENERATED arguments() method (built from - # validated/escaped arg specs — see _checked_arg_name / _escape_*), NOT the - # LLM's run_body. So it is not untrusted code and is not sandbox-wrapped here - # (wrapping it as the sandbox uid breaks the in-place measure.xml rewrite). - try: - subprocess.run( - ["openstudio", "measure", "-u", str(measure_dir)], - capture_output=True, text=True, timeout=30, check=False, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - pass + # The directory may have been writable by untrusted test code. Never load a + # post-test measure script with the long-lived server's privileges. + symlink_error = reject_escaping_symlinks(measure_dir) + if symlink_error: + raise RuntimeError(symlink_error) + + with tempfile.TemporaryDirectory(prefix="osmcp_measure_xml_") as tmp: + staged_measure = Path(tmp) / measure_dir.name + shutil.copytree(measure_dir, staged_measure, symlinks=True) + run_env = sandbox.build_env(staged_measure) + sandbox.prepare_workdir(Path(tmp)) + try: + proc = subprocess.run( # noqa: S603 - argv is fixed and sandbox-wrapped + sandbox.wrap_cmd( + ["openstudio", "measure", "-u", str(staged_measure)], + staged_measure, + ), + cwd=str(staged_measure), + env=run_env, + capture_output=True, + text=True, + timeout=30, + check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return + + detail = "\n".join(part.strip() for part in (proc.stdout, proc.stderr) if part.strip()) + staged_xml = staged_measure / "measure.xml" + if proc.returncode != 0 or not staged_xml.is_file() or staged_xml.is_symlink(): + raise RuntimeError( + f"Failed to update measure.xml (exit {proc.returncode}): {detail[-1000:]}", + ) + + def _copy_generated(name: str, max_bytes: int) -> None: + source = staged_measure / name + if not source.exists(): + return + if not source.is_file() or source.is_symlink(): + raise RuntimeError(f"Failed to update measure metadata: invalid generated {name}") + data = source.read_bytes() + if len(data) > max_bytes: + raise RuntimeError( + f"Failed to update measure metadata: generated {name} exceeds " + f"{max_bytes} bytes", + ) + temp_path: Path | None = None + with tempfile.NamedTemporaryFile( + dir=measure_dir, + prefix=f".{name}_", + delete=False, + ) as fh: + fh.write(data) + temp_path = Path(fh.name) + try: + temp_path.chmod(0o644) + temp_path.replace(measure_dir / name) + finally: + temp_path.unlink(missing_ok=True) + + _copy_generated("measure.xml", 5_000_000) + _copy_generated("README.md", 5_000_000) def _add_intended_software_tools(measure_dir: Path): diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index efcc932..00e62e6 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -622,6 +622,66 @@ async def _run_test_measure(url, port): auto_listener.close() +def test_measure_xml_refresh_does_not_execute_rewritten_measure_unconfined(full_server): + # Regression: sandboxed test code could rewrite measure.rb, then test_measure + # ran `openstudio measure -u` unsandboxed as root and evaluated the rewritten + # file. A disposable /repo canary proves no post-test evaluation escapes. + url, _ = full_server + canary = Path(f"/repo/_sandbox_xml_refresh_{uuid.uuid4().hex[:10]}.txt") + + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + created = unwrap(await s.call_tool("create_measure", { + "name": _unique("xml_refresh_probe"), + "description": "Benign post-test XML refresh confinement probe.", + "run_body": " runner.registerInfo('normal measure body')", + "language": "Ruby", + "arguments": [], + })) + assert created["ok"] is True, f"create_measure failed: {created}" + + measure_dir = Path(created["measure_dir"]) + test_files = sorted((measure_dir / "tests").glob("*_test.rb")) + assert len(test_files) == 1, ( + f"generated Ruby measure should have exactly one test file, got {test_files}" + ) + test_files[0].write_text( + textwrap.dedent( + f""" + require 'openstudio' + require 'minitest/autorun' + require_relative '../measure' + + class XmlRefreshRewriteTest < Minitest::Test + def test_rewrite_measure_after_load + measure_path = File.expand_path('../measure.rb', __dir__) + original = File.read(measure_path) + payload = "File.write('{canary}', 'UNSANDBOXED_XML_REFRESH')\\n" + File.write(measure_path, payload + original) + assert_equal(false, File.exist?('{canary}')) + end + end + """, + ).lstrip(), + encoding="utf-8", + ) + + return unwrap(await s.call_tool("test_measure", { + "measure_dir": str(measure_dir), + })) + + try: + result = asyncio.run(_run()) + assert result["ok"] is True, f"sandboxed measure test should pass: {result}" + assert not canary.exists(), ( + "post-test `openstudio measure -u` evaluated rewritten measure.rb " + "outside the sandbox and created the host-mounted canary" + ) + finally: + canary.unlink(missing_ok=True) + + # --------------------------------------------------------------------------- # Codex review reproductions (confirm-now): each PASSES today by demonstrating # the open hole, run under the FULL tier (OSMCP_SANDBOX=auto) so it proves the From 9633194e587594880d06934cc22ae169adf5ea7b Mon Sep 17 00:00:00 2001 From: brianlball Date: Sun, 7 Jun 2026 11:01:56 -0400 Subject: [PATCH 24/26] Document simulation sandbox security --- README.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 178b727..9ed965b 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_ "command": "docker", "args": [ "run", "--rm", "-i", - "-v", "./tests/assets:/inputs", + "-v", "./tests/assets:/inputs:ro", "-v", "./runs:/runs", "-v", "./.claude/skills:/skills:ro", "-e", "OPENSTUDIO_MCP_MODE=prod", @@ -95,6 +95,76 @@ See **[docs/remote-multi-user.md](docs/remote-multi-user.md)** for setup, auth, --- +## Security and simulation sandbox + +OpenStudio measures are Ruby or Python programs and must be treated as untrusted +code. EnergyPlus workflows can also invoke measure code. Docker isolates the +container from the host, while openstudio-mcp adds a second sandbox around the +child processes used by `apply_measure`, `test_measure`, measure metadata +refresh, and simulations. + +The default `OSMCP_SANDBOX=auto` mode provides the following controls on Linux, +including Docker Desktop's Linux VM: + +- **Privilege drop:** child processes run as the image's unprivileged + `sandbox` user (UID/GID 1001), while the MCP server retains only the + privileges needed to prepare run directories. +- **Filesystem policy:** Landlock denies filesystem access by default. The + current run or staged measure directory is writable; required system and + OpenStudio directories are read-only. `/repo`, `/inputs`, and other users' + run directories are not exposed to measure code. +- **Network policy:** seccomp denies outbound IP networking by default. +- **Secret isolation:** child processes receive an allowlisted environment + instead of inheriting API keys, tokens, and other server environment + variables. +- **Process hardening:** `no_new_privs` prevents privilege recovery through + setuid executables. Resource limits constrain generated file size and process + count, and simulations have a wall-clock timeout. +- **Staging:** input models, weather files, measures, and OSWs are copied into a + private run directory before execution. Escaping symlinks are rejected. + +On Linux, `auto` fails closed if Landlock or the seccomp network filter cannot +be installed: the untrusted child process does not run. On native macOS or +Windows without Docker, kernel confinement is unavailable and the server warns +that only environment filtering is active. Use the Docker image when running +untrusted measures. + +### Sandbox options + +| Variable | Default | Behavior | +|----------|---------|----------| +| `OSMCP_SANDBOX` | `auto` | `auto`, `full`, and `landlock` request environment filtering, UID/GID drop, resource limits, Landlock filesystem confinement, and seccomp network denial. `posix` omits Landlock and seccomp. `off` disables child-process confinement and is only appropriate for trusted local code. Unknown values fall back to `auto`. | +| `OSMCP_SANDBOX_NET` | `deny` | `deny` blocks outbound IP networking. `allow` permits it for trusted measures that must fetch external resources. | +| `OSMCP_SANDBOX_UID` / `OSMCP_SANDBOX_GID` | `1001` | Account used for confined child processes. Values less than 1 are rejected. The default matches the `sandbox` user built into the image. | +| `OSMCP_SANDBOX_RLIMIT_FSIZE` | `10737418240` | Maximum size in bytes of one file created by a child process. `0` disables this limit. | +| `OSMCP_SANDBOX_RLIMIT_NPROC` | `1024` | Maximum processes/threads for the sandbox UID. `0` disables this limit. | +| `OSMCP_SANDBOX_RLIMIT_NOFILE` | `0` | Optional open-file descriptor limit. | +| `OSMCP_SANDBOX_RLIMIT_CPU` | `0` | Optional CPU-seconds limit. Disabled by default because annual simulations can be long-running. | +| `OSMCP_SANDBOX_RLIMIT_AS` | `0` | Optional virtual-memory limit. Prefer Docker memory limits because restrictive address-space limits can break EnergyPlus. | +| `OSMCP_SIM_TIMEOUT_SECONDS` | `7200` | Wall-clock timeout for a simulation. `0` disables the timeout. | + +### Secure deployment guidance + +- Mount `/inputs` read-only: `-v /host/inputs:/inputs:ro`. +- Mount only the output directory at `/runs`; any process allowed to write + `/runs` can modify that host directory by design. +- Mount workflow guides read-only at `/skills`, or use the copy already baked + into the image: `-v /host/.claude/skills:/skills:ro`. +- Do not mount the repository, home directory, Docker socket, credentials, or + broad host paths into production containers. The `/repo` source mount in the + testing commands is for development only. +- Keep `OSMCP_SANDBOX=auto` and `OSMCP_SANDBOX_NET=deny` for untrusted measure + authoring. Docker's `--network none` can provide an additional + container-wide network boundary when remote access is not required. +- Use Docker CPU, memory, PID, and disk quotas as outer limits. The in-process + resource limits are defense in depth, not replacements for container limits. + +The sandbox protects the host and other run directories, but it intentionally +allows measure code to modify its own staged run directory. Treat resulting +OSM, SQL, report, and log files as untrusted outputs. + +--- + ## Workflow skills In Claude Code, 12 bundled skills add workflow automation and domain knowledge: From 6d803f1f5021c28a7d73fe21b2043481c4209421 Mon Sep 17 00:00:00 2001 From: brianlball Date: Sun, 7 Jun 2026 11:37:27 -0400 Subject: [PATCH 25/26] fix(security): address PR #64 review (Copilot + manual) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixup on the post-test measure-metadata-refresh hardening: 1. Drop two raw codex-review dumps committed to repo root (codex-security-review-*.MD) — docs/review/ is already gitignored for exactly these artifacts. 2. Bounded, no-follow copy-back (Copilot C1 + TOCTOU): new util.read_file_bounded reads measure.xml/README.md with O_NOFOLLOW and caps the read at max_bytes+1, so a confined-but-untrusted 'measure -u' cannot slurp a giant file into the unconfined server nor swap the output for a symlink to a host secret. 3. chmod best-effort (Copilot C2): replace() is the critical step; suppress OSError so a restrictive mount/non-POSIX host does not fail the whole refresh. 4. test_measure: post-test refresh is now best-effort — a failure is surfaced as metadata_warning instead of masking a passing (confined) test. Confinement, not the refresh exit code, is the security boundary. create/edit keep the raise (there the xml IS the product). Tests (red-green verified): test_path_safety read_file_bounded (oversize/symlink/non-regular); test_measure_authoring passing-test-not-masked (RED on revert: ok:False 'Symlink escapes...'). Co-Authored-By: Claude Opus 4.8 (1M context) --- codex-security-review-develop.MD | 159 ------------- ...curity-review-feat-measure-exec-sandbox.MD | 214 ------------------ .../skills/measure_authoring/operations.py | 40 +++- mcp_server/util.py | 45 ++++ tests/test_measure_authoring.py | 54 +++++ tests/test_path_safety.py | 44 ++++ 6 files changed, 171 insertions(+), 385 deletions(-) delete mode 100644 codex-security-review-develop.MD delete mode 100644 codex-security-review-feat-measure-exec-sandbox.MD diff --git a/codex-security-review-develop.MD b/codex-security-review-develop.MD deleted file mode 100644 index 2622440..0000000 --- a/codex-security-review-develop.MD +++ /dev/null @@ -1,159 +0,0 @@ -# Codex Security Review: `develop` - -Reviewed June 7, 2026. - -## Findings - -### Critical: Arbitrary measure code runs with the MCP container's authority - -Measures and tests execute arbitrary Ruby/Python as the container user, with -the full environment and all container mounts. `apply_measure` invokes -`openstudio` directly with `os.environ.copy()`. - -`test_measure` directly runs `pytest` or Ruby. Since the image has no `USER`, -this is probably root inside the container. - -### Critical: `run_osw` is also arbitrary-code execution - -An OSW can contain measure steps. `run_osw` accepts an unrestricted path, -copies the OSW's entire parent directory, and executes it. It must be sandboxed -exactly like `apply_measure`. - -### Critical: Simulation paths bypass the established path policy - -`run_simulation` accepts unrestricted OSM and EPW paths. A caller can cause an -arbitrary readable file to be copied into their run directory. This bypasses -the otherwise established `is_path_allowed()` policy. - -### Critical: `test_measure` modifies and executes in the source directory - -`test_measure` writes `tests/test_model.osm`, runs tests in the caller-provided -measure directory, and updates `measure.xml`. If the directory is under -`/repo`, `/inputs`, or another host mount, untrusted code can modify host files. - -### High: Writable bind mounts expose the host filesystem - -The documented launch configuration mounts `/inputs` and `/runs` read-write. -Docker bind mounts are host filesystem access; container processes can modify -or delete host files unless the mounts are read-only. - -### High: The container lacks basic runtime hardening - -The container has no non-root `USER`, read-only root filesystem, capability -drop, `no-new-privileges`, network restriction, PID limit, or memory/CPU limit. - -### High: Documentation describes sandbox controls absent from `develop` - -The sandbox plan claims several protections are complete, but the referenced -`sandbox.py`, Landlock, seccomp, and execution shim are absent from the -`develop` checkout. This creates dangerous false assurance. - -### Medium: Process lifecycle controls are incomplete - -Simulations have no enforced wall-clock timeout and cancellation kills only -the recorded parent PID, not the complete process group or cgroup. - -### Medium: Generated Ruby strings permit interpolation - -Ruby string generation does not escape `#{...}` interpolation. Metadata fields -could execute expressions when `openstudio measure -u` evaluates the generated -measure. - -## Recommended Architecture - -Do not run untrusted code as a subprocess inside the long-lived MCP container. -Use two components: - -```text -MCP control plane - | - | job manifest + staged files - v -Job runner/orchestrator - | - v -Ephemeral sandbox worker - - one job only - - input mount read-only - - private output mount read-write - - no /repo, shared /runs, /inputs, home, or Docker socket - - no network - - destroyed after completion -``` - -The MCP server should: - -1. Validate requested paths. -2. Copy only explicitly referenced OSM, EPW, measure, and OSW files into a - private job input directory. -3. Submit an immutable job manifest containing hashes and limits. -4. Receive only approved result artifacts. -5. Never receive the Docker socket. A separate trusted runner should create - worker containers. - -The worker should use controls equivalent to: - -```text ---runtime=runsc ---network=none ---read-only ---user=10001:10001 ---cap-drop=ALL ---security-opt=no-new-privileges=true ---pids-limit=256 ---memory= ---cpus= ---tmpfs=/tmp:rw,noexec,nosuid,nodev,size=... -input:/job/input:ro -output:/job/output:rw -``` - -Also use the default or tighter seccomp profile, AppArmor/SELinux policy, a -hard wall timeout, storage quotas, and kill the entire worker container on -cancellation. - -## Tool Policy - -- `run_simulation`: sandboxed worker, with an internally generated OSW - containing no measure steps. -- `run_osw`: sandboxed worker; disabled by default for remote users or reject - measure steps unless explicitly authorized. -- `apply_measure`: sandboxed worker. -- `test_measure`: sandboxed worker operating on a staged copy. -- `openstudio measure -u`: sandboxed worker because it loads measure code. -- Syntax checks: sandboxed too. Tests and imports can execute code through - plugins and startup behavior. - -Measure authoring itself can remain in the control plane if it only writes text -into a private user directory. Any evaluation of that text must leave the -control plane. - -## Deployment Priority - -1. Immediately mount `/inputs` and `/repo` read-only; mount only a dedicated - output directory read-write. -2. Run the MCP image as non-root and add Docker resource/security flags. -3. Disable `run_osw`, `apply_measure`, and `test_measure` in remote deployments - until isolated execution exists. -4. Add `is_path_allowed()` checks to every simulation and measure path. -5. Stage inputs into private job directories and reject symlinks and escaping - OSW references. -6. Implement ephemeral worker containers. -7. Use gVisor `runsc` for untrusted multi-user workloads; use microVMs if the - threat model includes determined hostile tenants. -8. Add attack tests proving filesystem escape, environment leakage, network - access, fork exhaustion, and cross-user access succeed without the sandbox - and fail with it. - -Rootless Docker or user-namespace remapping reduces host impact, but neither -replaces per-job isolation. Static measure scanning and human approval are -useful secondary controls, not containment. - -## Sources - -- [Docker bind mounts](https://docs.docker.com/engine/storage/bind-mounts/) -- [Docker container security options](https://docs.docker.com/reference/cli/docker/container/run) -- [Docker seccomp](https://docs.docker.com/engine/security/seccomp/) -- [Docker rootless mode](https://docs.docker.com/engine/security/rootless/) -- [gVisor](https://gvisor.dev/docs/) -- [gVisor with Docker](https://gvisor.dev/docs/user_guide/quick_start/docker/) diff --git a/codex-security-review-feat-measure-exec-sandbox.MD b/codex-security-review-feat-measure-exec-sandbox.MD deleted file mode 100644 index f8ae884..0000000 --- a/codex-security-review-feat-measure-exec-sandbox.MD +++ /dev/null @@ -1,214 +0,0 @@ -# Codex Security Review: `feat/measure-exec-sandbox` - -Reviewed June 7, 2026. - -## Verdict - -The branch makes substantial, technically sound improvements over `develop`. -Landlock, seccomp, environment filtering, UID dropping, path checks, symlink -checks, rlimits, and simulation process-group termination materially reduce the -blast radius of authored measures. - -It is a good defense-in-depth sandbox for a local or trusted single-container -deployment. It is not yet a sufficient hostile multi-tenant boundary. One -critical post-test sandbox escape remains, and process/resource isolation is -still weaker than a per-job container or microVM. - -## Findings - -### Critical: sandboxed tests are followed by unsandboxed Ruby execution - -After `test_measure_op` runs untrusted tests in the sandbox, it calls -`_update_measure_xml(mdir, language)`. That helper executes: - -```text -openstudio measure -u -``` - -without `sandbox.build_env()`, `sandbox.prepare_workdir()`, or -`sandbox.wrap_cmd()`. - -A sandboxed test has write access to its measure directory. It can replace -`measure.rb` with top-level Ruby code, finish successfully, and rely on the -root MCP process to load that modified file during the XML update. This crosses -the sandbox boundary and restores arbitrary code execution with the server's -authority. - -Required fix: - -- Never evaluate a file after it has been writable by untrusted code using an - unsandboxed process. -- Run `openstudio measure -u` through the sandbox. -- Prefer running it on a fresh private copy and copying only validated - `measure.xml` metadata back. -- Alternatively, update checksums/metadata with a non-executing XML writer. - -### High: measure and test timeout handling can leave descendants alive - -`apply_measure`, reporting-measure tests, and normal measure tests use -`subprocess.run(..., timeout=...)` without a new session or cgroup. On timeout, -Python terminates the immediate wrapper process, but forked or daemonized -children can survive. - -Those descendants retain their inherited Landlock/seccomp policy, but they can -continue consuming CPU, PIDs, memory, and storage. They can also interfere with -other sandbox workers that share UID 1001. - -Required fix: - -- Start every untrusted execution in a new session/process group. -- On timeout, send TERM and then KILL to the complete process group. -- Prefer a per-job cgroup or container and destroy that unit on completion. - -### High: shared UID and PID namespace weaken cross-user isolation - -Every worker uses UID 1001 in the same container and PID namespace. `/proc` is -also granted read access. Concurrent jobs can observe process metadata and may -signal same-UID processes. Depending on kernel ptrace policy, they may also -inspect or manipulate peer processes. - -The default simulation concurrency of one does not cover concurrent -`apply_measure` or `test_measure` calls. - -Required fix: - -- Use a separate PID namespace or container for each job. -- At minimum, deny `ptrace`, `process_vm_readv`, `process_vm_writev`, and - cross-worker signaling where practical. -- Use per-job UIDs if multiple jobs remain in one PID namespace. -- Do not grant the host/container-wide `/proc`; mount a private proc filesystem. - -### High: broad `/etc`, `/proc`, and `/sys` read grants expose unnecessary data - -The Landlock policy grants all of `/etc`, `/proc`, and `/sys` read access. -Dropped UID permissions block some sensitive files, but world-readable -configuration, process metadata, mounted configuration secrets, kernel details, -and service information remain visible. - -Required fix: - -- Determine the exact runtime files OpenStudio requires and grant only those. -- Stage required certificates, locale data, timezone data, and configuration - into a minimal runtime root. -- Use mount/PID namespaces where broad compatibility paths are unavoidable. - -### Medium: runtime hardening is not part of the documented default launch - -The image adds a sandbox UID, but the long-lived MCP server still runs as root. -The README launch example does not apply `--read-only`, `--cap-drop=ALL`, -`no-new-privileges`, PID/memory/CPU limits, or read-only input mounts. - -The subprocess sandbox reduces measure risk, but ordinary MCP server bugs still -execute with root authority inside a container with writable host mounts. - -Recommended fix: - -- Run the control-plane server as non-root where OpenStudio permits it. -- Make `/inputs`, `/repo`, and `/skills` read-only. -- Apply container resource limits and capability restrictions. -- Keep only a dedicated output/job directory writable. - -### Medium: sandbox status is not actually surfaced - -`active_tier()` exists but is not included in tool responses or audit events. -It infers the tier from platform/configuration rather than a successful backend -probe. Full mode does fail closed at execution time, which is good, but -operators cannot inspect the effective state before submitting work. - -Recommended fix: - -- Probe Landlock/seccomp at startup. -- Expose the result in `get_server_status`, audit logs, and every execution - result. -- Distinguish requested mode from verified active controls. - -### Medium: public `run_osw` still copies the entire parent directory - -Path and escaping-symlink checks close the arbitrary-host-file issue, but -copying the whole parent directory is still unnecessarily broad. An OSW in a -large allowed directory can copy unrelated data and consume substantial disk. - -Recommended fix: - -- Parse the OSW and stage only explicitly referenced seed, weather, measure, - file, and companion resources. -- Reject absolute and parent-traversing references unless separately authorized. -- Apply file-count and byte limits during staging. - -### Low: rlimit installation failures do not fail closed - -The shim logs an rlimit failure and continues. Landlock and seccomp correctly -fail closed, but resource backstops can silently be absent. - -Recommended fix: - -- Treat configured mandatory limits as fatal. -- Record applied limits in the execution result. - -## Changes That Correctly Address `develop` - -- `apply_measure`, reporting-measure tests, normal tests, and simulations now - pass through a common sandbox wrapper. -- The subprocess environment is explicitly allowlisted instead of inheriting - server secrets. -- Full mode fails closed if Landlock or seccomp cannot engage. -- Landlock is read-deny-by-default and limits writes to the job directory. -- `/repo` and shared `/inputs` are no longer granted to the confined process. -- `/dev` is reduced to specific device files. -- Seccomp denies IPv4/IPv6 sockets and handles unexpected architectures and x32 - fail-closed. -- `io_uring_setup` is denied to prevent a network-filter bypass. -- The child drops to a validated non-root UID/GID and sets `no_new_privs`. -- Public simulation/OSW paths now use `is_path_allowed()`. -- Escaping symlinks are rejected before staging. -- Shared measures are copied before tests mutate them. -- Simulation wall-clock timeout and process-group cancellation are implemented. -- Ruby interpolation and generated argument identifiers are validated. -- Unknown sandbox modes and non-finite timeout values no longer silently weaken - policy. -- Security tests use dual-run canaries to prove both the original vulnerability - and the confined behavior. - -## How I Would Design It Differently - -For untrusted or remote multi-user use, I would make the process sandbox a -secondary control and use a per-job worker as the primary boundary: - -```text -non-root MCP control plane - -> validated immutable job manifest - -> trusted runner service - -> ephemeral worker container using gVisor/runsc -``` - -Each worker would receive: - -- One read-only staged input directory containing only referenced files. -- One private writable output directory. -- No `/repo`, shared `/runs`, `/inputs`, user home, credentials, or Docker - socket. -- `--network=none`. -- A read-only root filesystem and bounded tmpfs. -- Non-root UID, all capabilities dropped, `no-new-privileges`. -- Dedicated PID, mount, IPC, user, and network namespaces. -- Cgroup limits for memory, CPU, PIDs, I/O, and wall time. -- Automatic destruction on success, failure, cancellation, or timeout. - -The control plane would validate and copy back only expected artifacts. It -would never execute generated Ruby or Python after a worker has modified it. - -This costs more operationally, but it gives a much clearer security claim: -arbitrary measure code can compromise only an ephemeral worker and its private -job data, not the long-lived MCP process or another user's execution. - -## Verification - -Focused Windows-compatible tests: - -```text -46 passed, 16 skipped, 1 deselected -``` - -The excluded test directly exercises Linux non-root Landlock behavior and -cannot run on Windows. Docker integration could not be rerun during this review -because access to the local Docker Desktop engine was denied. diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 30bb848..5536df9 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -5,6 +5,7 @@ """ from __future__ import annotations +import contextlib import re import shutil import subprocess @@ -16,7 +17,7 @@ from mcp_server import sandbox from mcp_server.config import INPUT_ROOT, is_path_allowed, user_run_root -from mcp_server.util import reject_escaping_symlinks +from mcp_server.util import read_file_bounded, reject_escaping_symlinks def custom_measures_dir() -> Path: @@ -538,14 +539,17 @@ def _copy_generated(name: str, max_bytes: int) -> None: source = staged_measure / name if not source.exists(): return - if not source.is_file() or source.is_symlink(): - raise RuntimeError(f"Failed to update measure metadata: invalid generated {name}") - data = source.read_bytes() - if len(data) > max_bytes: + # Bounded, no-follow read: `measure -u` ran UNTRUSTED code (confined) + # that could have swapped the output for a symlink (-> a host secret) + # or a giant file. read_file_bounded uses O_NOFOLLOW and caps the read + # at max_bytes+1, so neither a TOCTOU symlink swap nor an oversized + # file reaches the unconfined server. + try: + data = read_file_bounded(source, max_bytes) + except ValueError as e: raise RuntimeError( - f"Failed to update measure metadata: generated {name} exceeds " - f"{max_bytes} bytes", - ) + f"Failed to update measure metadata: invalid generated {name}: {e}", + ) from e temp_path: Path | None = None with tempfile.NamedTemporaryFile( dir=measure_dir, @@ -555,7 +559,10 @@ def _copy_generated(name: str, max_bytes: int) -> None: fh.write(data) temp_path = Path(fh.name) try: - temp_path.chmod(0o644) + # chmod is best-effort: replace() is the critical step, and some + # filesystems/mounts (or non-POSIX hosts) reject chmod. + with contextlib.suppress(OSError): + temp_path.chmod(0o644) temp_path.replace(measure_dir / name) finally: temp_path.unlink(missing_ok=True) @@ -1178,15 +1185,24 @@ def test_measure_op( # Update measure.xml checksums — test_model.osm was added to tests/, # and any new files must be registered for OS App Measure Manager. - _update_measure_xml(mdir, language) - - return { + # Best-effort here: the (confined) test already ran and its pass/fail is + # the contract. A metadata-refresh failure (measure -u nonzero, or a + # rejected oversize/symlink output) is surfaced as a warning, NOT a test + # failure — confinement, not this call's exit code, is the security + # boundary, so a passing test must not be masked. (create/edit keep the + # raise: there the regenerated measure.xml IS the product.) + result = { "ok": proc.returncode == 0, "passed": passed, "failed": failed, "errors": errors, "test_output": display, } + try: + _update_measure_xml(mdir, language) + except RuntimeError as e: + result["metadata_warning"] = str(e) + return result except subprocess.TimeoutExpired: return {"ok": False, "error": "Test run timed out (60s)"} diff --git a/mcp_server/util.py b/mcp_server/util.py index d3d2926..bbe9308 100644 --- a/mcp_server/util.py +++ b/mcp_server/util.py @@ -3,6 +3,7 @@ import json import os import shutil +import stat from pathlib import Path @@ -32,6 +33,50 @@ def reject_escaping_symlinks(root: Path) -> str | None: return None +def read_file_bounded(path: Path, max_bytes: int) -> bytes: + """Read a regular file WITHOUT following a final-component symlink, capping + the read at ``max_bytes``. + + Hardening for copying back files produced by confined-but-untrusted code + (e.g. ``openstudio measure -u`` regenerating ``measure.xml`` in a staged + dir): + * O_NOFOLLOW (where available) closes the symlink-swap TOCTOU — a measure + that replaces the output with a link to e.g. /etc/shadow cannot redirect + the read to the link target. + * the read stops at ``max_bytes + 1``, so an oversized output cannot be + slurped wholesale into memory before being rejected. + + Raises ``ValueError`` on a symlink, a non-regular file (dir/fifo/device), an + oversize file, or an unreadable path. On platforms without O_NOFOLLOW the + symlink guard degrades to the caller's own is_symlink() check (the sandbox + targets Linux). + """ + flags = (os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0) + | getattr(os, "O_NONBLOCK", 0) | getattr(os, "O_BINARY", 0)) + try: + fd = os.open(path, flags) + except OSError as e: + # ELOOP => final component is a symlink (O_NOFOLLOW); surface clearly. + raise ValueError(f"cannot read {path} (symlink or unreadable): {e}") from e + try: + if not stat.S_ISREG(os.fstat(fd).st_mode): + raise ValueError(f"not a regular file: {path}") + chunks: list[bytes] = [] + remaining = max_bytes + 1 + while remaining > 0: + chunk = os.read(fd, remaining) + if not chunk: + break + chunks.append(chunk) + remaining -= len(chunk) + data = b"".join(chunks) + if len(data) > max_bytes: + raise ValueError(f"file exceeds {max_bytes} bytes: {path}") + return data + finally: + os.close(fd) + + def safe_read_text(path: Path, max_bytes: int = 200_000) -> str: data = safe_read_bytes(path, max_bytes=max_bytes) return data.decode("utf-8", errors="replace") diff --git a/tests/test_measure_authoring.py b/tests/test_measure_authoring.py index b48ad4e..70580bf 100644 --- a/tests/test_measure_authoring.py +++ b/tests/test_measure_authoring.py @@ -4,6 +4,7 @@ """ import asyncio import uuid +from pathlib import Path import pytest from conftest import integration_enabled, server_params, setup_example, unwrap @@ -203,6 +204,59 @@ async def _run(): asyncio.run(_run()) +@pytest.mark.integration +def test_xml_refresh_failure_preserves_passing_test_results(): + # Regression: a post-test metadata-refresh failure must NOT mask a passing + # (confined) test. The test drops an escaping symlink while confined; the + # post-test reject_escaping_symlinks guard then makes _update_measure_xml + # raise. Before this fix that raise propagated and test_measure returned + # ok:False, dropping passed/failed. Confinement — not the refresh's success — + # is the security boundary. + if not integration_enabled(): + pytest.skip("integration disabled") + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + create = unwrap(await s.call_tool("create_measure", { + "name": _unique("xml_refresh_warn"), + "description": "metadata refresh warning probe", + "run_body": ' runner.registerInfo("ok")', + "language": "Ruby", + })) + assert create["ok"] is True, create + measure_dir = Path(create["measure_dir"]) + # Replace the generated test with a PASSING one that, while running + # confined, plants an escaping symlink in the measure dir. The + # post-test reject_escaping_symlinks guard trips _update_measure_xml + # — deterministic, unlike `measure -u`'s (very tolerant) exit code. + test_files = sorted((measure_dir / "tests").glob("*_test.rb")) + assert len(test_files) == 1, test_files + test_files[0].write_text( + "require 'minitest/autorun'\n" + "class EscapeLinkTest < Minitest::Test\n" + " def test_drop_escaping_symlink\n" + " File.symlink('/etc/shadow', 'evil_escape_link') " + "unless File.symlink?('evil_escape_link')\n" + " assert(File.symlink?('evil_escape_link'))\n" + " end\n" + "end\n", + encoding="utf-8", + ) + return unwrap(await s.call_tool("test_measure", { + "measure_dir": str(measure_dir), + })) + + res = asyncio.run(_run()) + assert res["ok"] is True, f"passing test masked by xml-refresh failure: {res}" + assert res["passed"] > 0, res + assert "metadata_warning" in res, ( + f"expected non-fatal metadata_warning when refresh fails, got: {res}" + ) + assert "escape" in res["metadata_warning"].lower(), res["metadata_warning"] + + @pytest.mark.integration def test_test_measure_python_passes(): """Create a simple Python measure, run its tests.""" diff --git a/tests/test_path_safety.py b/tests/test_path_safety.py index 16aa24d..48b954f 100644 --- a/tests/test_path_safety.py +++ b/tests/test_path_safety.py @@ -5,12 +5,56 @@ from __future__ import annotations import json +import os import subprocess as _subprocess import pytest +from mcp_server.util import read_file_bounded + pytestmark = pytest.mark.unit + +class TestReadFileBounded: + """read_file_bounded — bounded, symlink-refusing copy-back read used after + confined `openstudio measure -u` regenerates measure.xml/README.md.""" + + def test_reads_within_bound(self, tmp_path): + # Validates: a normal generated file is returned verbatim. + f = tmp_path / "measure.xml" + f.write_bytes(b"" * 5) + assert read_file_bounded(f, 1000) == b"" * 5 + + def test_rejects_oversize(self, tmp_path): + # Regression: untrusted measure -u could write a giant measure.xml; the + # read is bounded (max_bytes+1) so it can't be slurped wholesale into + # the unconfined server's memory (Copilot C1). + f = tmp_path / "big.xml" + f.write_bytes(b"A" * 200) + with pytest.raises(ValueError, match="exceeds"): + read_file_bounded(f, 100) + + @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="O_NOFOLLOW is POSIX-only") + def test_refuses_to_follow_symlink(self, tmp_path): + # Regression: measure -u (untrusted) could swap the output for a symlink + # to a host secret; O_NOFOLLOW means copy-back never reads the link + # target (TOCTOU). A naive read_bytes() would return the secret here. + secret = tmp_path / "secret" + secret.write_bytes(b"TOPSECRET") + link = tmp_path / "measure.xml" + link.symlink_to(secret) + with pytest.raises(ValueError, match="symlink"): + read_file_bounded(link, 1000) + + @pytest.mark.skipif(not hasattr(os, "O_NONBLOCK"), reason="POSIX fifo test") + def test_refuses_non_regular_file(self, tmp_path): + # Validates: a fifo/dir masquerading as a generated file is rejected + # (not a regular file) instead of blocking or reading junk. + d = tmp_path / "README.md" + d.mkdir() + with pytest.raises(ValueError, match="not a regular file"): + read_file_bounded(d, 1000) + # --------------------------------------------------------------------------- # C-1: seed_file path traversal guard in run_osw # --------------------------------------------------------------------------- From 3afb1e0b2f3e994730d8b947761df7a234fc114b Mon Sep 17 00:00:00 2001 From: brianlball Date: Sun, 7 Jun 2026 12:47:20 -0400 Subject: [PATCH 26/26] fix(security): per-tenant sandbox uids + drop /proc from Landlock (H1/M1/M2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Post-merge hardening of the measure/sim sandbox for multi-tenant HTTP. H1 (secret leak): /proc was a Landlock RO root, so a confined measure could read /proc//environ — on a non-root server (measure uid == server uid) that recovers the server secrets clean-env stripped, incl. HTTP auth tokens. Drop /proc from the RO allowlist; Landlock now denies it for everyone. Verified EnergyPlus/OpenStudio still run (test_mcp_seb4 + full sandbox suite green). M1 (fork-bomb DoS) + M2 (posix no FS isolation): every tenant shared uid 1001, so RLIMIT_NPROC (per-uid) was one shared budget (one tenant starves all) and posix-tier run dirs were mutually readable by DAC. Add config.sandbox_ids(): each remote tenant (HTTP session / auth principal) derives its own stable uid via hashlib (2000..61999, gid==uid); LOCAL keeps the baked 1001. Wired into wrap_cmd + prepare_workdir so chown and setuid agree. Per-uid NPROC budgets + DAC isolation even without Landlock. Tests (red-green verified, all RED on unfixed code): test_path_safety TestPerTenantSandboxUid (distinct/stable uids, LOCAL base, uid<=0 clamp); test_sandbox proc-not-in-RO-allowlist, full-tier-denies-/proc/self/environ, per-tenant-uid DAC isolation. Updated test_posix/test_h5 uid assertions to the per-tenant contract (HTTP fixtures now derive a uid); base-uid clamp moved to a unit test. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp_server/config.py | 39 ++++++++++++++++ mcp_server/sandbox.py | 23 +++++++--- tests/test_path_safety.py | 43 ++++++++++++++++++ tests/test_sandbox.py | 94 +++++++++++++++++++++++++++++++++++---- 4 files changed, 184 insertions(+), 15 deletions(-) diff --git a/mcp_server/config.py b/mcp_server/config.py index a541f2e..5eff5d4 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -118,6 +118,45 @@ def _safe_sandbox_id(env_val: str, default: int) -> int: SANDBOX_UID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001) SANDBOX_GID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001) +# Per-tenant uid derivation (multi-user hardening). Every distinct remote caller +# (HTTP session / auth principal) gets its OWN sandbox uid so that: +# - RLIMIT_NPROC — which the kernel counts PER REAL UID — is a private budget, +# so one tenant's fork bomb can't starve every other tenant's runs; and +# - DAC isolates tenants' run dirs from each other even in the posix tier +# (no Landlock), since the dirs are chowned to, and processes run as, +# different uids. +# The local single user (stdio / off-request) keeps the baked-in SANDBOX_UID, +# preserving today's behavior. Derived uids sit above the system/sandbox range +# and below `nobody`, and are never root. +_PER_TENANT_UID_BASE = 2000 +_PER_TENANT_UID_SPAN = 60000 # uids 2000..61999 + + +def _derive_tenant_uid(key: str) -> int: + """Stable, process-independent uid for a tenant key. + + Uses hashlib (NOT the salted builtin hash()) so chown (server) and setuid + (shim, a separate process) derive the SAME uid for a request across restarts. + """ + import hashlib + digest = hashlib.sha256(key.encode("utf-8")).digest() + return _PER_TENANT_UID_BASE + (int.from_bytes(digest[:4], "big") % _PER_TENANT_UID_SPAN) + + +def sandbox_ids() -> tuple[int, int]: + """(uid, gid) the CURRENT caller's confined subprocess drops to. + + Resolved per-call from caller identity (same key in → same uid out, so a + request's prepare_workdir chown and the shim's setuid agree). LOCAL → the + baked SANDBOX_UID/GID; each remote tenant → its own derived uid (gid==uid). + """ + from mcp_server.identity import LOCAL, user_key + key = user_key() + if key == LOCAL: + return (SANDBOX_UID, SANDBOX_GID) + uid = _derive_tenant_uid(key) + return (uid, uid) + # rlimit backstops for confined subprocesses (0 = off). Generous by design — they # catch runaway bombs, not tune normal use. CPU/AS default off: a CPU-second cap # would kill long annual sims, and RLIMIT_AS (virtual address space) breaks diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py index 1f8a61d..80bc85b 100644 --- a/mcp_server/sandbox.py +++ b/mcp_server/sandbox.py @@ -31,7 +31,6 @@ from mcp_server.config import ( COMMON_MEASURES_DIR, COMSTOCK_MEASURES_DIR, - SANDBOX_GID, SANDBOX_MODE, SANDBOX_NET, SANDBOX_RLIMIT_AS, @@ -39,8 +38,8 @@ SANDBOX_RLIMIT_FSIZE, SANDBOX_RLIMIT_NOFILE, SANDBOX_RLIMIT_NPROC, - SANDBOX_UID, SKILLS_DIR, + sandbox_ids, ) # Read-only roots the confined subprocess legitimately needs (Landlock grants @@ -48,8 +47,13 @@ # by default: anything not listed — another user's run, /tmp, arbitrary host # files — is unreadable. Missing paths are skipped by the Landlock layer. _FULL_MODES = ("auto", "full", "landlock") +# /proc is deliberately NOT granted (H1): with /proc readable a confined measure +# could read /proc//environ and, on a non-root server (measure uid == server +# uid), recover the server's secrets that clean-env stripped — including HTTP auth +# tokens — and inspect peer tenants' processes. OpenStudio/EnergyPlus run without +# it. /sys/<...> hardware info stays (no secrets; some libs read CPU topology). _RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", - "/opt/venv", "/usr/local", "/var/oscli", "/proc", "/sys") + "/opt/venv", "/usr/local", "/var/oscli", "/sys") # Specific device files the confined process needs — granted individually instead # of the whole /dev directory. A `/dev` rw grant also exposes writable /dev/shm and @@ -192,9 +196,13 @@ def wrap_cmd(cmd: list[str], work_dir: Path | str, "full FS/network confinement", file=sys.stderr) _warned_no_backend = True return list(cmd) + # Per-tenant uid/gid: each remote caller drops to its own uid (private NPROC + # budget + DAC isolation); the local single user keeps the base uid. Resolved + # here so it matches the chown prepare_workdir does for the same request. + uid, gid = sandbox_ids() wrapped = [ sys.executable, "-m", "mcp_server._sandbox_exec", - "--uid", str(SANDBOX_UID), "--gid", str(SANDBOX_GID), + "--uid", str(uid), "--gid", str(gid), ] for flag, value in ( ("--rlimit-fsize", SANDBOX_RLIMIT_FSIZE), @@ -233,10 +241,13 @@ def prepare_workdir(work_dir: Path | str) -> None: if not enabled() or not (hasattr(os, "geteuid") and os.geteuid() == 0): return work = Path(work_dir) + # Per-tenant uid/gid (matches the shim's setuid for this same request) so the + # dropped process owns its run dir and tenants are DAC-isolated from each other. + uid, gid = sandbox_ids() # follow_symlinks=False (lchown): never let a symlink inside the dir redirect # the chown to a target outside it (symlink-traversal privilege issue). with contextlib.suppress(OSError): - os.chown(work, SANDBOX_UID, SANDBOX_GID, follow_symlinks=False) + os.chown(work, uid, gid, follow_symlinks=False) for path in work.rglob("*"): with contextlib.suppress(OSError): - os.chown(path, SANDBOX_UID, SANDBOX_GID, follow_symlinks=False) + os.chown(path, uid, gid, follow_symlinks=False) diff --git a/tests/test_path_safety.py b/tests/test_path_safety.py index 48b954f..71cd926 100644 --- a/tests/test_path_safety.py +++ b/tests/test_path_safety.py @@ -55,6 +55,49 @@ def test_refuses_non_regular_file(self, tmp_path): with pytest.raises(ValueError, match="not a regular file"): read_file_bounded(d, 1000) + +class TestPerTenantSandboxUid: + """Per-tenant sandbox uid derivation (M1/M2). Each remote tenant gets its own + uid so RLIMIT_NPROC (per-uid) is a private budget and DAC isolates run dirs + even without Landlock; the local single user keeps the baked-in base uid.""" + + def test_local_keeps_base_uid(self, monkeypatch): + # Validates: stdio/off-request single user keeps the baked sandbox uid — + # preserves today's behavior, no per-tenant derivation. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "local") + assert cfg.sandbox_ids() == (cfg.SANDBOX_UID, cfg.SANDBOX_GID) + + def test_distinct_uid_per_tenant(self, monkeypatch): + # Regression (M1/M2): all tenants shared uid 1001 -> one shared NPROC + # budget (cross-tenant fork-bomb DoS) and zero DAC isolation in the posix + # tier. Each tenant key must map to its OWN non-root uid. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "alice") + a = cfg.sandbox_ids() + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "bob") + b = cfg.sandbox_ids() + assert a != b, "distinct tenants must get distinct sandbox uids" + assert a[0] >= 2000 and b[0] >= 2000, "derived uids must be above the system/sandbox range" + assert a[0] != cfg.SANDBOX_UID, "a tenant uid must differ from the local base uid" + assert a[1] == a[0] and b[1] == b[0], "gid tracks uid" + + def test_uid_stable_for_same_key(self, monkeypatch): + # Validates: derivation is stable across calls (hashlib, not randomized + # hash()) so a request's chown and setuid agree on the same uid. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "carol") + assert cfg.sandbox_ids() == cfg.sandbox_ids() + + def test_sandbox_uid_zero_clamped(self): + # Regression (Codex H5): OSMCP_SANDBOX_UID<=0 must clamp to a non-root uid + # so the local base uid can never be root (kept as a unit test now that the + # HTTP h5 path exercises per-tenant derivation instead of the base uid). + import mcp_server.config as cfg + assert cfg._safe_sandbox_id("0", 1001) == 1001 + assert cfg._safe_sandbox_id("-5", 1001) == 1001 + assert cfg._safe_sandbox_id("4242", 1001) == 4242 + # --------------------------------------------------------------------------- # C-1: seed_file path traversal guard in run_osw # --------------------------------------------------------------------------- diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index 00e62e6..ca24297 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -23,8 +23,8 @@ are rejected (check is unconditional, not gated by OSMCP_SANDBOX). Fix increment 2 (UID drop + rlimits) and increment 3 (Landlock FS + seccomp net-deny) have landed: - - test_posix_drops_root_and_applies_rlimits — uid 1001, NPROC rlimit, DAC blocks - a root-owned write (OSMCP_SANDBOX=posix). + - test_posix_drops_root_and_applies_rlimits — per-tenant non-root uid, NPROC + rlimit, DAC blocks a root-owned write (OSMCP_SANDBOX=posix). - test_full_tier_blocks_read_escape_and_network — Landlock blocks reading a world-readable file outside the run dir, blocks writing outside it, and seccomp blocks outbound IP networking (OSMCP_SANDBOX=auto), while the run @@ -39,6 +39,7 @@ """ import asyncio import os +import re import shutil import socket import subprocess @@ -402,9 +403,15 @@ async def _run(): info = asyncio.run(_run()) - # --- Assert: privilege dropped, rlimit applied, DAC blocks the escape write --- - assert "PROBE uid=1001" in info, f"measure should drop to sandbox uid 1001; info=\n{info}" - assert "PROBE uid=0" not in info, "measure must not run as root under posix" + # --- Assert: privilege dropped to a per-tenant uid, rlimit applied, DAC blocks + # the escape write. The HTTP fixture is session-keyed, so the measure drops to a + # derived per-tenant uid (M1/M2), not the shared base 1001. --- + m = re.search(r"PROBE uid=(\d+)", info) + assert m, f"probe must report uid; info=\n{info}" + uid = int(m.group(1)) + assert uid != 0, "measure must not run as root under posix" + assert 2000 <= uid < 62000, \ + f"HTTP tenant must drop to a per-tenant sandbox uid (2000..61999), got {uid}; info=\n{info}" assert "PROBE nproc=1024" in info, f"NPROC rlimit (1024) should be applied; info=\n{info}" assert "PROBE write_err=" in info, \ f"write to root-owned /opt should be denied under posix; info=\n{info}" @@ -788,10 +795,16 @@ async def _run(): return "\n".join(res.get("runner_messages", {}).get("info", [])) info = asyncio.run(_run()) - assert "PROBE uid=0" not in info, \ - f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}" - assert "PROBE uid=1001" in info, \ - f"H5: sandbox uid must clamp to the safe default 1001; info=\n{info}" + # OSMCP_SANDBOX_UID=0 must never yield a root-running measure. The base-uid + # clamp is unit-tested (test_sandbox_uid_zero_clamped); this HTTP fixture is + # session-keyed, so the measure drops to a per-tenant derived uid (also never + # root) regardless of the bad base override. + m = re.search(r"PROBE uid=(\d+)", info) + assert m, f"H5: probe must report uid; info=\n{info}" + uid = int(m.group(1)) + assert uid != 0, f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}" + assert 2000 <= uid < 62000, \ + f"H5: confined code must run as a non-root per-tenant uid, got {uid}; info=\n{info}" def test_unknown_sandbox_mode_fails_closed(): @@ -885,6 +898,69 @@ def test_test_measure_does_not_mutate_shared_source(tmp_path, monkeypatch): "shared measure source/tests was mutated" +def test_proc_not_in_landlock_ro_allowlist(): + # Regression (H1): /proc was a Landlock RO root, so a confined measure could + # read /proc//environ. On a non-root server (measure uid == server uid) + # that recovers the server secrets clean-env stripped — incl. HTTP auth + # tokens. /proc must NOT be granted; Landlock then denies it for everyone. + from mcp_server import sandbox + ro = sandbox._ro_paths() + assert "/proc" not in ro, f"/proc must not be a Landlock RO root (H1): {ro}" + + +def test_full_tier_denies_proc_environ_read(): + # Regression (H1, dual-proof): with /proc granted, a confined process could + # read /proc/self/environ (DAC always allows self) — the same surface that + # leaks /proc//environ on a non-root server. Built via the REAL + # wrap_cmd so it tracks the actual allowlist; the read must now be DENIED. + from mcp_server import sandbox + if sandbox.active_tier() != "landlock": + pytest.skip("needs full tier (OSMCP_SANDBOX=auto)") + workdir = tempfile.mkdtemp() + cmd = sandbox.wrap_cmd(["cat", "/proc/self/environ"], workdir) + try: + proc = subprocess.run( # noqa: S603 - fixed argv, no shell + cmd, capture_output=True, text=True, cwd="/repo", + user=1001, group=1001, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, f"/proc read must be denied (H1); rc=0 out={out[:200]}" + assert ("denied" in out or "no such" in out + or "operation not permitted" in out), f"expected denial; out={out[:300]}" + finally: + shutil.rmtree(workdir, ignore_errors=True) + + +def test_per_tenant_uid_gives_dac_isolation(): + # Regression (M2): every tenant shared uid 1001, so in the posix tier (no + # Landlock) tenant B could read tenant A's run files by plain DAC. Per-tenant + # uids restore DAC isolation — a 0600 file owned by tenant A's uid is + # unreadable by tenant B's uid, with no Landlock involved. + from mcp_server import config + uid_a = config._derive_tenant_uid("tenantA") + uid_b = config._derive_tenant_uid("tenantB") + assert uid_a != uid_b, "distinct tenants must derive distinct uids" + d = Path(tempfile.mkdtemp()) + try: + secret = d / "modelA.osm" + secret.write_text("TENANT_A_PRIVATE") + os.chown(secret, uid_a, uid_a) + secret.chmod(0o600) + # dir must be traversable so the denial proven is the file's, not the dir's + d.chmod(0o755) + # absolute path (no PATH lookup), fixed argv, no shell + proc = subprocess.run( # noqa: S603 + ["/bin/cat", str(secret)], + capture_output=True, text=True, + user=uid_b, group=uid_b, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, f"tenant B must not read tenant A's 0600 file; rc=0 out={out}" + assert "permission denied" in out, f"expected DAC denial; out={out}" + finally: + shutil.rmtree(d, ignore_errors=True) + + def test_input_root_not_in_landlock_ro_allowlist(): # Regression (Codex #5): /inputs (INPUT_ROOT) is a SHARED multi-tenant dir; # granting it read in the Landlock policy let one tenant's confined measure read