voshch · catmub · Jun 9, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -71,3 +71,4 @@ __pycache__/
 
 # agents
 /arena_training/agents/
+_meta/docker/features/vla_server/model/*.pth
diff --git a/_meta/docker/features/vla_server/AUTHORING.md b/_meta/docker/features/vla_server/AUTHORING.md
@@ -0,0 +1,90 @@
+# Authoring a new VLA/VLN model
+
+End-to-end guide for serving a new vision-language navigation model through the
+`vla_server` feature. Integration means writing a small HTTP **runner** that the
+daemon launches on demand and that emits the actions contract; the Arena side
+(the `vla` mobile adapter) needs no changes beyond pointing at your model.
+
+The reference runner is [server_omnivla_edge.py](server_omnivla_edge.py); copy it
+as a starting point.
+
+## Prerequisites
+
+- The feature installed once: `arena feature vla_server install`.
+- Your weights published somewhere `curl` can fetch (HuggingFace today).
+
+## 1. Add a `models.yaml` row
+
+[models.yaml](models.yaml) is the model registry. Add one row keyed by the model
+name you will select:
+
+```yaml
+my-vln:
+  repo: Org/my-vln        # HuggingFace repo id
+  weights: my-vln.pth     # file in the repo; also the file your runner loads
+  server: server_my_vln.py  # runner script, next to this file
+```
+
+## 2. Write the runner (`server_my_vln.py`)
+
+The daemon launches it as `python3 server_my_vln.py --port <p> --weights <path>`,
+so it must accept `--port` and `--weights` and serve these endpoints:
+
+| Method | Path | Body | Returns |
+|---|---|---|---|
+| GET | `/health` | (none) | `200` once the model is loaded and ready |
+| POST | `/reset` | form: `session` | drops that session's history |
+| POST | `/act` | multipart: file `image` (JPEG) + form `instruction`, `session` | the actions JSON below |
+
+`session` is a per-robot id (the robot's namespace); keep any rolling observation
+history keyed by it, and clear it on `/reset`. The daemon loads each model once
+and caches it, so a single runner serves all robots/envs requesting that model;
+concurrent robots serialize on one GPU.
+
+The `/act` response is the wire contract; its typed schema in
+[contract.py](../../../../task_generator/task_generator/tasks/robots/vla/contract.py)
+is the SSOT:
+
+```json
+{
+  "actions": { "mobile": { "waypoints": [
+    { "x": 0.5, "y": 0.0, "yaw": 0.1 },
+    { "x": 1.0, "y": 0.2, "yaw": 0.2 }
+  ] } },
+  "meta": { "intent": "heading for the doorway" }
+}
+```
+
+- `actions.mobile` is a key-tagged union; emit **exactly one** form. `waypoints`
+  is the only form consumed today (base-relative, meters and radians).
+- `meta` is optional; omit it (or `meta.intent`) if your model emits no language.
+  When present, `intent` is shown as a text marker above the robot.
+
+## 3. Download weights and build
+
+```bash
+arena feature vla_server update
+```
+
+`update` fetches every `models.yaml` row's weights into
+`$ARENA_DATA_DIR/vla/<model>/<weights>` (idempotent, skips existing) and rebuilds
+the image so your runner script is baked in.
+
+## 4. Select your model
+
+Model selection is a constant today: set `_MODEL` in
+[adapters/mobile/vla.py](../../../../task_generator/task_generator/tasks/robots/adapters/mobile/vla.py)
+to your `models.yaml` key (`my-vln`). It becomes a registry-backed param once
+multi-model selection lands.
+
+Then run a VLA episode (`tm_robots:=vla`); the adapter ensures the daemon, calls
+`POST /ensure {model}` to load your runner, and connects for `/act`.
+
+## Adding a new action form
+
+To return something other than `waypoints` (for example direct `cmd_vel`):
+
+1. Add the typed variant to the `mobile` union in [contract.py](../../../../task_generator/task_generator/tasks/robots/vla/contract.py) and its `_parse_mobile` branch.
+2. Register a handler for it in the `_mobile_handlers` map in [adapters/mobile/vla.py](../../../../task_generator/task_generator/tasks/robots/adapters/mobile/vla.py).
+
+The adapter dispatches per form, so existing models keep working unchanged.
diff --git a/_meta/docker/features/vla_server/Dockerfile b/_meta/docker/features/vla_server/Dockerfile
@@ -0,0 +1,18 @@
+ARG TORCH_TAG=2.5.1-cuda12.4-cudnn9-runtime
+FROM pytorch/pytorch:${TORCH_TAG} AS base
+
+USER root
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential git curl \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir --upgrade pip
+
+# install, patch and pre-fetch CLIP on its own layer so later dependency changes do not re-download it
+RUN pip install openai-clip \
+    && sed -i 's/from pkg_resources import packaging/import packaging, packaging.version/' \
+       "$(python -c 'import clip, os; print(os.path.dirname(clip.__file__))')/clip.py" \
+    && python -c "import clip; clip.load('ViT-B/32', device='cpu')"
+
+RUN pip install fastapi "uvicorn[standard]" Pillow numpy requests packaging matplotlib efficientnet_pytorch python-multipart pyyaml
diff --git a/_meta/docker/features/vla_server/daemon.py b/_meta/docker/features/vla_server/daemon.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""Thin model-manager daemon for vla_server.
+
+Holds no model itself. On `POST /ensure {model}` it spawns that model's runner (the server script and
+weights from models.yaml) on a free port if one is not already running, and returns the port. Runners
+share the daemon's host network, so the caller connects to `127.0.0.1:<port>`.
+"""
+
+import os
+import socket
+import subprocess
+import threading
+import time
+import urllib.request
+
+import uvicorn
+import yaml
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+_DIR = os.path.dirname(os.path.abspath(__file__))
+_DATA_DIR = "/opt/arena_ws/data/vla"
+
+with open(os.path.join(_DIR, "models.yaml")) as f:
+    _MODELS = yaml.safe_load(f)
+
+app = FastAPI()
+_lock = threading.Lock()
+_runners: dict[str, tuple[subprocess.Popen, int]] = {}
+
+
+class EnsureRequest(BaseModel):
+    model: str
+
+
+def _free_port() -> int:
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+def _healthy(port: int) -> bool:
+    try:
+        with urllib.request.urlopen(f"http://127.0.0.1:{port}/health", timeout=1) as r:
+            return r.status == 200
+    except OSError:
+        return False
+
+
+@app.get("/health")
+def health() -> dict:
+    return {"status": "ok"}
+
+
+@app.post("/ensure")
+def ensure(req: EnsureRequest) -> dict:
+    if req.model not in _MODELS:
+        raise HTTPException(status_code=404, detail=f"unknown model {req.model!r}")
+    with _lock:
+        existing = _runners.get(req.model)
+        if existing is not None and existing[0].poll() is None:
+            return {"port": existing[1]}
+        row = _MODELS[req.model]
+        port = _free_port()
+        weights = os.path.join(_DATA_DIR, req.model, row["weights"])
+        proc = subprocess.Popen(["python3", os.path.join(_DIR, row["server"]), "--port", str(port), "--weights", weights])
+        for _ in range(180):
+            if _healthy(port):
+                _runners[req.model] = (proc, port)
+                return {"port": port}
+            if proc.poll() is not None:
+                raise HTTPException(status_code=500, detail=f"runner for {req.model!r} exited with {proc.returncode}")
+            time.sleep(1)
+        proc.terminate()
+        raise HTTPException(status_code=504, detail=f"runner for {req.model!r} did not become healthy")
+
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/_meta/docker/features/vla_server/docker-compose.yml b/_meta/docker/features/vla_server/docker-compose.yml
@@ -0,0 +1,28 @@
+services:
+  vla_server:
+    image: vla_server:omnivla-edge
+    pull_policy: "never"
+    build:
+      context: .
+      dockerfile: src/Arena/_meta/docker/features/vla_server/Dockerfile
+      # args:
+    volumes:
+      - "$HOST_ARENA_WS_DIR/src:/opt/arena_ws/src"
+      - "$HOST_ARENA_DATA_DIR:/opt/arena_ws/data:ro"
+    environment:
+      - HOST_ARENA_WS_DIR
+      - VLA_MODEL_DIR=/opt/arena_ws/src/Arena/_meta/docker/features/vla_server/model
+    command: ["python3", "/opt/arena_ws/src/Arena/_meta/docker/features/vla_server/daemon.py"]
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/health || exit 1"]
+      interval: 5s
+      timeout: 3s
+      retries: 30
+      start_period: 5s
+    network_mode: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
diff --git a/_meta/docker/features/vla_server/main b/_meta/docker/features/vla_server/main
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+name="vla_server"
+
+_feature_dir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
+
+update(){
+    python3 "$_feature_dir/manifest.py" manifest | while IFS=$'\t' read -r model repo weights; do
+        dest="$ARENA_DATA_DIR/vla/${model}/${weights}"
+        if [ ! -s "$dest" ]; then
+            mkdir -p "$(dirname "$dest")"
+            curl -fSL "https://huggingface.co/${repo}/resolve/main/${weights}" -o "$dest"
+        fi
+    done
+    source "$ARENA_DIR/_meta/docker/lib"
+    arena_docker_compose build vla_server
+}
+
+uninstall(){
+    arena_docker_compose rm -fs vla_server || true
+    arena registry remove "$name"
+}
+
+source_fn(){
+    return 0
+}
+
+launch(){
+    source "$ARENA_DIR/_meta/docker/lib"
+    if ! arena_docker_compose up -d --wait vla_server >&2; then
+        echo "vla_server: runner failed to become healthy, recent logs:" >&2
+        arena_docker_compose logs --tail 40 vla_server >&2
+        return 1
+    fi
+    echo 8000
+}
+
+help(){
+    echo "Usage: $name <install|update|uninstall|launch|source>"
+}
+
+if [ $# -lt 1 ]; then
+    help
+    exit 1
+fi
+case "$1" in
+    install)
+        _feature_default_install "$name"
+        exit $?
+    ;;
+    update)
+        update
+        exit $?
+    ;;
+    uninstall)
+        uninstall
+        # shellcheck disable=SC2317
+        return $? 2>/dev/null || exit $?
+    ;;
+    launch)
+        launch
+        exit $?
+    ;;
+    source)
+        source_fn
+        return $?
+    ;;
+    *)
+        help
+        exit 1
+    ;;
+esac
diff --git a/_meta/docker/features/vla_server/manifest.py b/_meta/docker/features/vla_server/manifest.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""Emit the vla_server weight manifest as `<model>\t<repo>\t<weights>` lines for the download loop."""
+
+import os
+import sys
+
+import yaml
+
+_FEATURE_DIR = os.path.dirname(os.path.abspath(__file__))
+_MODELS_YAML = os.path.join(_FEATURE_DIR, "models.yaml")
+
+
+def main() -> None:
+    if sys.argv[1:2] != ["manifest"]:
+        print(f"usage: {sys.argv[0]} manifest", file=sys.stderr)
+        sys.exit(2)
+    with open(_MODELS_YAML) as f:
+        models = yaml.safe_load(f)
+    for key, row in models.items():
+        print(f"{key}\t{row['repo']}\t{row['weights']}")
+
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -71,3 +71,4 @@ __pycache__/

		# agents
		/arena_training/agents/
		_meta/docker/features/vla_server/model/*.pth