Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions crates/openshell-driver-vm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,12 @@ The VM guest's serial console is appended to `<state-dir>/<sandbox-id>/console.l
- Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest)
- `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary.
- [mise](https://mise.jdx.dev/) task runner
- Docker-compatible socket on the local CLI/gateway host when using
- Docker or Podman socket on the local CLI/gateway host when using
`openshell sandbox create --from ./Dockerfile` or `--from ./dir`; the CLI
builds the image and the VM driver exports it via the local Docker daemon
builds the image and the VM driver exports it via the local container engine.
Docker is tried first; if unavailable, the driver falls back to the Podman
socket. On Linux, enable the Podman API socket with
`systemctl --user start podman.socket`
- `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts)

## Releases
Expand Down
110 changes: 79 additions & 31 deletions crates/openshell-driver-vm/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -762,12 +762,14 @@ impl VmDriver {
sandbox_id: &str,
image_ref: &str,
) -> Result<String, Status> {
if let Some((docker, image_identity)) = self.resolve_local_docker_image(image_ref).await? {
if let Some((engine, image_identity)) =
self.resolve_local_container_image(image_ref).await?
{
return self
.ensure_cached_local_image_rootfs_archive(
sandbox_id,
image_ref,
&docker,
&engine,
&image_identity,
)
.await;
Expand Down Expand Up @@ -860,31 +862,30 @@ impl VmDriver {
Ok(image_identity)
}

async fn resolve_local_docker_image(
async fn resolve_local_container_image(
&self,
image_ref: &str,
) -> Result<Option<(Docker, String)>, Status> {
let required_local_image = is_openshell_local_build_image_ref(image_ref);
let docker = match Docker::connect_with_local_defaults() {
Ok(docker) => docker,
Err(err) if required_local_image => {
let engine = match connect_local_container_engine().await {
Some(engine) => engine,
None if required_local_image => {
return Err(Status::failed_precondition(format!(
"failed to connect to local Docker daemon for locally built sandbox image '{image_ref}': {err}"
"no container engine (Docker/Podman) available for locally built sandbox image '{image_ref}'"
)));
}
Err(err) => {
None => {
warn!(
image_ref = %image_ref,
error = %err,
"vm driver: local Docker daemon unavailable, falling back to registry"
"vm driver: no local container engine available, falling back to registry"
);
return Ok(None);
}
};

match docker.inspect_image(image_ref).await {
match engine.inspect_image(image_ref).await {
Ok(inspect) => {
if let Some(message) = local_docker_image_platform_mismatch(
if let Some(message) = local_image_platform_mismatch(
image_ref,
inspect.os.as_deref(),
inspect.architecture.as_deref(),
Expand All @@ -895,30 +896,28 @@ impl VmDriver {
warn!(
image_ref = %image_ref,
%message,
"vm driver: local Docker image platform mismatch, falling back to registry"
"vm driver: local container image platform mismatch, falling back to registry"
);
return Ok(None);
}

let image_identity =
inspect
.id
.filter(|id| !id.trim().is_empty())
.ok_or_else(|| {
Status::failed_precondition(format!(
"local Docker image '{image_ref}' inspect response has no image ID"
))
})?;
let image_identity = inspect.id.filter(|id| !id.trim().is_empty()).ok_or_else(
|| {
Status::failed_precondition(format!(
"local container image '{image_ref}' inspect response has no image ID"
))
},
)?;
info!(
image_ref = %image_ref,
image_identity = %image_identity,
"vm driver: resolved image from local Docker daemon"
"vm driver: resolved image from local container engine"
);
Ok(Some((docker, image_identity)))
Ok(Some((engine, image_identity)))
}
Err(err) if is_docker_not_found_error(&err) && required_local_image => {
Err(Status::failed_precondition(format!(
"locally built sandbox image '{image_ref}' is not present in the local Docker daemon"
"locally built sandbox image '{image_ref}' is not present in the local container engine"
)))
}
Err(err) if is_docker_not_found_error(&err) => Ok(None),
Expand All @@ -929,7 +928,7 @@ impl VmDriver {
warn!(
image_ref = %image_ref,
error = %err,
"vm driver: local Docker image inspection failed, falling back to registry"
"vm driver: local container image inspection failed, falling back to registry"
);
Ok(None)
}
Expand Down Expand Up @@ -1526,11 +1525,60 @@ fn parse_registry_reference(image_ref: &str) -> Result<Reference, Status> {
})
}

/// Try to connect to a local container engine (Docker or Podman).
///
/// Tries Docker first (`connect_with_local_defaults`, which respects
/// `DOCKER_HOST`). If Docker is unavailable, falls back to the Podman
/// socket, which exposes a Docker-compatible API.
async fn connect_local_container_engine() -> Option<Docker> {
if let Ok(docker) = Docker::connect_with_local_defaults() {
if docker.ping().await.is_ok() {
return Some(docker);
}
}

let podman_socket = podman_socket_path();
if podman_socket.exists() {
if let Ok(docker) =
Docker::connect_with_unix(podman_socket.to_str()?, 120, bollard::API_DEFAULT_VERSION)
{
if docker.ping().await.is_ok() {
info!(
socket = %podman_socket.display(),
"vm driver: connected to Podman (Docker-compatible API)"
);
return Some(docker);
}
}
}

None
}

/// Podman user socket path for the current platform.
fn podman_socket_path() -> PathBuf {
#[cfg(target_os = "macos")]
{
let home = std::env::var("HOME").unwrap_or_default();
PathBuf::from(home).join(".local/share/containers/podman/machine/podman.sock")
}
#[cfg(target_os = "linux")]
{
std::env::var("XDG_RUNTIME_DIR").map_or_else(
|_| {
let uid = nix::unistd::getuid();
PathBuf::from(format!("/run/user/{uid}/podman/podman.sock"))
},
|xdg| PathBuf::from(xdg).join("podman/podman.sock"),
)
}
}

fn is_openshell_local_build_image_ref(image_ref: &str) -> bool {
image_ref.starts_with("openshell/sandbox-from:")
}

fn local_docker_image_platform_mismatch(
fn local_image_platform_mismatch(
image_ref: &str,
actual_os: Option<&str>,
actual_arch: Option<&str>,
Expand Down Expand Up @@ -2923,17 +2971,17 @@ mod tests {
}

#[test]
fn local_docker_image_platform_mismatch_checks_guest_platform() {
fn local_image_platform_mismatch_checks_guest_platform() {
assert!(
local_docker_image_platform_mismatch(
local_image_platform_mismatch(
"openshell/sandbox-from:123",
Some("linux"),
Some(linux_oci_arch()),
)
.is_none()
);

let err = local_docker_image_platform_mismatch(
let err = local_image_platform_mismatch(
"openshell/sandbox-from:123",
Some("linux"),
Some("wrong-arch"),
Expand All @@ -2942,7 +2990,7 @@ mod tests {
assert!(err.contains("wrong-arch"));
assert!(err.contains(linux_oci_arch()));

let err = local_docker_image_platform_mismatch("openshell/sandbox-from:123", None, None)
let err = local_image_platform_mismatch("openshell/sandbox-from:123", None, None)
.expect("unknown platform should be reported");
assert!(err.contains("unknown/unknown"));
}
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/sandbox-compute-drivers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ For maintainer-level implementation details, refer to the [VM driver README](htt

The gateway starts `openshell-driver-vm` over a private Unix socket and passes its process ID so the driver can reject unexpected local clients. The driver's standalone TCP listener is disabled unless `--allow-unauthenticated-tcp` is set for local development.

### Local image resolution

The VM driver resolves sandbox images from a local container engine before falling back to registry pulls. It tries Docker first, then falls back to the Podman socket (Docker-compatible API). On Linux with Podman, enable the API socket so the driver can find local images:

```shell
systemctl --user start podman.socket
```

## Kubernetes Driver

Kubernetes-backed sandboxes run as pods in the configured sandbox namespace. Use Kubernetes for shared clusters, remote compute, GPU scheduling, and operator-managed environments.
Expand Down
88 changes: 88 additions & 0 deletions e2e/rust/tests/bypass_detection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Verify that sandbox bypass detection provides fast-fail UX: direct TCP
//! connections that skip the HTTP CONNECT proxy are rejected with
//! ECONNREFUSED (immediate) rather than hanging until a network timeout.
//!
//! This test is implementation-agnostic — it validates the observable
//! behavior (fast rejection) regardless of whether the kernel rules are
//! installed via iptables or nftables.

#![cfg(feature = "e2e")]

use openshell_e2e::harness::sandbox::SandboxGuard;

/// Python script that attempts a raw TCP connect bypassing the proxy.
///
/// `socket.connect()` does not honor HTTP_PROXY — it goes directly through
/// the kernel, hitting the OUTPUT chain REJECT rule. The script reports the
/// outcome and wall-clock time so the test can assert on both.
///
/// Target 198.51.100.1 is RFC 5737 TEST-NET-2 — documentation-only address
/// space that will never route. This doesn't matter because the REJECT rule
/// fires in the OUTPUT chain before the packet reaches the network.
fn bypass_attempt_script() -> &'static str {
r#"
import json, socket, time

start = time.monotonic()
result = "unknown"
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(10)
s.connect(("198.51.100.1", 80))
result = "connected"
s.close()
except ConnectionRefusedError:
result = "refused"
except socket.timeout:
result = "timeout"
except OSError as e:
result = f"error:{e}"

elapsed_ms = int((time.monotonic() - start) * 1000)
print(json.dumps({"bypass_result": result, "elapsed_ms": elapsed_ms}), flush=True)
"#
}

/// A direct TCP connection bypassing the proxy should be rejected
/// immediately (ECONNREFUSED), not hang until a timeout.
#[tokio::test]
async fn bypass_attempt_is_rejected_fast() {
let guard = SandboxGuard::create(&["--", "python3", "-c", bypass_attempt_script()])
.await
.expect("sandbox create");

let json_line = guard
.create_output
.lines()
.find(|l| l.contains("bypass_result"))
.unwrap_or_else(|| {
panic!(
"no bypass_result JSON in output:\n{}",
guard.create_output
)
});

let parsed: serde_json::Value = serde_json::from_str(json_line.trim()).unwrap_or_else(|e| {
panic!("failed to parse JSON '{json_line}': {e}")
});

let result = parsed["bypass_result"].as_str().unwrap();
let elapsed_ms = parsed["elapsed_ms"].as_u64().unwrap();

assert_eq!(
result, "refused",
"expected connection refused (REJECT rule), got '{result}' after {elapsed_ms}ms.\n\
If 'timeout': REJECT rules may not be installed in the sandbox netns.\n\
Full output:\n{}",
guard.create_output
);

assert!(
elapsed_ms < 3000,
"bypass rejection took {elapsed_ms}ms — expected < 3000ms.\n\
Fast rejection requires REJECT rules in the sandbox OUTPUT chain."
);
}
Loading