diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index 49f2ef005..371c6cefe 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -166,9 +166,12 @@ The VM guest's serial console is appended to `//console.l - Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest) - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. - [mise](https://mise.jdx.dev/) task runner -- Docker-compatible socket on the local CLI/gateway host when using +- Docker or Podman socket on the local CLI/gateway host when using `openshell sandbox create --from ./Dockerfile` or `--from ./dir`; the CLI - builds the image and the VM driver exports it via the local Docker daemon + builds the image and the VM driver exports it via the local container engine. + Docker is tried first; if unavailable, the driver falls back to the Podman + socket. On Linux, enable the Podman API socket with + `systemctl --user start podman.socket` - `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts) ## Releases diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index b797f4835..a08c9fc23 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -762,12 +762,14 @@ impl VmDriver { sandbox_id: &str, image_ref: &str, ) -> Result { - if let Some((docker, image_identity)) = self.resolve_local_docker_image(image_ref).await? { + if let Some((engine, image_identity)) = + self.resolve_local_container_image(image_ref).await? + { return self .ensure_cached_local_image_rootfs_archive( sandbox_id, image_ref, - &docker, + &engine, &image_identity, ) .await; @@ -860,31 +862,30 @@ impl VmDriver { Ok(image_identity) } - async fn resolve_local_docker_image( + async fn resolve_local_container_image( &self, image_ref: &str, ) -> Result, Status> { let required_local_image = is_openshell_local_build_image_ref(image_ref); - let docker = match Docker::connect_with_local_defaults() { - Ok(docker) => docker, - Err(err) if required_local_image => { + let engine = match connect_local_container_engine().await { + Some(engine) => engine, + None if required_local_image => { return Err(Status::failed_precondition(format!( - "failed to connect to local Docker daemon for locally built sandbox image '{image_ref}': {err}" + "no container engine (Docker/Podman) available for locally built sandbox image '{image_ref}'" ))); } - Err(err) => { + None => { warn!( image_ref = %image_ref, - error = %err, - "vm driver: local Docker daemon unavailable, falling back to registry" + "vm driver: no local container engine available, falling back to registry" ); return Ok(None); } }; - match docker.inspect_image(image_ref).await { + match engine.inspect_image(image_ref).await { Ok(inspect) => { - if let Some(message) = local_docker_image_platform_mismatch( + if let Some(message) = local_image_platform_mismatch( image_ref, inspect.os.as_deref(), inspect.architecture.as_deref(), @@ -895,30 +896,28 @@ impl VmDriver { warn!( image_ref = %image_ref, %message, - "vm driver: local Docker image platform mismatch, falling back to registry" + "vm driver: local container image platform mismatch, falling back to registry" ); return Ok(None); } - let image_identity = - inspect - .id - .filter(|id| !id.trim().is_empty()) - .ok_or_else(|| { - Status::failed_precondition(format!( - "local Docker image '{image_ref}' inspect response has no image ID" - )) - })?; + let image_identity = inspect.id.filter(|id| !id.trim().is_empty()).ok_or_else( + || { + Status::failed_precondition(format!( + "local container image '{image_ref}' inspect response has no image ID" + )) + }, + )?; info!( image_ref = %image_ref, image_identity = %image_identity, - "vm driver: resolved image from local Docker daemon" + "vm driver: resolved image from local container engine" ); - Ok(Some((docker, image_identity))) + Ok(Some((engine, image_identity))) } Err(err) if is_docker_not_found_error(&err) && required_local_image => { Err(Status::failed_precondition(format!( - "locally built sandbox image '{image_ref}' is not present in the local Docker daemon" + "locally built sandbox image '{image_ref}' is not present in the local container engine" ))) } Err(err) if is_docker_not_found_error(&err) => Ok(None), @@ -929,7 +928,7 @@ impl VmDriver { warn!( image_ref = %image_ref, error = %err, - "vm driver: local Docker image inspection failed, falling back to registry" + "vm driver: local container image inspection failed, falling back to registry" ); Ok(None) } @@ -1526,11 +1525,60 @@ fn parse_registry_reference(image_ref: &str) -> Result { }) } +/// Try to connect to a local container engine (Docker or Podman). +/// +/// Tries Docker first (`connect_with_local_defaults`, which respects +/// `DOCKER_HOST`). If Docker is unavailable, falls back to the Podman +/// socket, which exposes a Docker-compatible API. +async fn connect_local_container_engine() -> Option { + if let Ok(docker) = Docker::connect_with_local_defaults() { + if docker.ping().await.is_ok() { + return Some(docker); + } + } + + let podman_socket = podman_socket_path(); + if podman_socket.exists() { + if let Ok(docker) = + Docker::connect_with_unix(podman_socket.to_str()?, 120, bollard::API_DEFAULT_VERSION) + { + if docker.ping().await.is_ok() { + info!( + socket = %podman_socket.display(), + "vm driver: connected to Podman (Docker-compatible API)" + ); + return Some(docker); + } + } + } + + None +} + +/// Podman user socket path for the current platform. +fn podman_socket_path() -> PathBuf { + #[cfg(target_os = "macos")] + { + let home = std::env::var("HOME").unwrap_or_default(); + PathBuf::from(home).join(".local/share/containers/podman/machine/podman.sock") + } + #[cfg(target_os = "linux")] + { + std::env::var("XDG_RUNTIME_DIR").map_or_else( + |_| { + let uid = nix::unistd::getuid(); + PathBuf::from(format!("/run/user/{uid}/podman/podman.sock")) + }, + |xdg| PathBuf::from(xdg).join("podman/podman.sock"), + ) + } +} + fn is_openshell_local_build_image_ref(image_ref: &str) -> bool { image_ref.starts_with("openshell/sandbox-from:") } -fn local_docker_image_platform_mismatch( +fn local_image_platform_mismatch( image_ref: &str, actual_os: Option<&str>, actual_arch: Option<&str>, @@ -2923,9 +2971,9 @@ mod tests { } #[test] - fn local_docker_image_platform_mismatch_checks_guest_platform() { + fn local_image_platform_mismatch_checks_guest_platform() { assert!( - local_docker_image_platform_mismatch( + local_image_platform_mismatch( "openshell/sandbox-from:123", Some("linux"), Some(linux_oci_arch()), @@ -2933,7 +2981,7 @@ mod tests { .is_none() ); - let err = local_docker_image_platform_mismatch( + let err = local_image_platform_mismatch( "openshell/sandbox-from:123", Some("linux"), Some("wrong-arch"), @@ -2942,7 +2990,7 @@ mod tests { assert!(err.contains("wrong-arch")); assert!(err.contains(linux_oci_arch())); - let err = local_docker_image_platform_mismatch("openshell/sandbox-from:123", None, None) + let err = local_image_platform_mismatch("openshell/sandbox-from:123", None, None) .expect("unknown platform should be reported"); assert!(err.contains("unknown/unknown")); } diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index da1951c4b..b97357fa6 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -87,6 +87,14 @@ For maintainer-level implementation details, refer to the [VM driver README](htt The gateway starts `openshell-driver-vm` over a private Unix socket and passes its process ID so the driver can reject unexpected local clients. The driver's standalone TCP listener is disabled unless `--allow-unauthenticated-tcp` is set for local development. +### Local image resolution + +The VM driver resolves sandbox images from a local container engine before falling back to registry pulls. It tries Docker first, then falls back to the Podman socket (Docker-compatible API). On Linux with Podman, enable the API socket so the driver can find local images: + +```shell +systemctl --user start podman.socket +``` + ## Kubernetes Driver Kubernetes-backed sandboxes run as pods in the configured sandbox namespace. Use Kubernetes for shared clusters, remote compute, GPU scheduling, and operator-managed environments. diff --git a/e2e/rust/tests/bypass_detection.rs b/e2e/rust/tests/bypass_detection.rs new file mode 100644 index 000000000..698fd346d --- /dev/null +++ b/e2e/rust/tests/bypass_detection.rs @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Verify that sandbox bypass detection provides fast-fail UX: direct TCP +//! connections that skip the HTTP CONNECT proxy are rejected with +//! ECONNREFUSED (immediate) rather than hanging until a network timeout. +//! +//! This test is implementation-agnostic — it validates the observable +//! behavior (fast rejection) regardless of whether the kernel rules are +//! installed via iptables or nftables. + +#![cfg(feature = "e2e")] + +use openshell_e2e::harness::sandbox::SandboxGuard; + +/// Python script that attempts a raw TCP connect bypassing the proxy. +/// +/// `socket.connect()` does not honor HTTP_PROXY — it goes directly through +/// the kernel, hitting the OUTPUT chain REJECT rule. The script reports the +/// outcome and wall-clock time so the test can assert on both. +/// +/// Target 198.51.100.1 is RFC 5737 TEST-NET-2 — documentation-only address +/// space that will never route. This doesn't matter because the REJECT rule +/// fires in the OUTPUT chain before the packet reaches the network. +fn bypass_attempt_script() -> &'static str { + r#" +import json, socket, time + +start = time.monotonic() +result = "unknown" +try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(10) + s.connect(("198.51.100.1", 80)) + result = "connected" + s.close() +except ConnectionRefusedError: + result = "refused" +except socket.timeout: + result = "timeout" +except OSError as e: + result = f"error:{e}" + +elapsed_ms = int((time.monotonic() - start) * 1000) +print(json.dumps({"bypass_result": result, "elapsed_ms": elapsed_ms}), flush=True) +"# +} + +/// A direct TCP connection bypassing the proxy should be rejected +/// immediately (ECONNREFUSED), not hang until a timeout. +#[tokio::test] +async fn bypass_attempt_is_rejected_fast() { + let guard = SandboxGuard::create(&["--", "python3", "-c", bypass_attempt_script()]) + .await + .expect("sandbox create"); + + let json_line = guard + .create_output + .lines() + .find(|l| l.contains("bypass_result")) + .unwrap_or_else(|| { + panic!( + "no bypass_result JSON in output:\n{}", + guard.create_output + ) + }); + + let parsed: serde_json::Value = serde_json::from_str(json_line.trim()).unwrap_or_else(|e| { + panic!("failed to parse JSON '{json_line}': {e}") + }); + + let result = parsed["bypass_result"].as_str().unwrap(); + let elapsed_ms = parsed["elapsed_ms"].as_u64().unwrap(); + + assert_eq!( + result, "refused", + "expected connection refused (REJECT rule), got '{result}' after {elapsed_ms}ms.\n\ + If 'timeout': REJECT rules may not be installed in the sandbox netns.\n\ + Full output:\n{}", + guard.create_output + ); + + assert!( + elapsed_ms < 3000, + "bypass rejection took {elapsed_ms}ms — expected < 3000ms.\n\ + Fast rejection requires REJECT rules in the sandbox OUTPUT chain." + ); +}