diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index cbf55423d..e9baf0e5c 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -1,18 +1,56 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +/// How the supervisor binary is delivered into sandbox pods. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SupervisorSideloadMethod { + /// Mount the supervisor OCI image directly as a read-only volume + /// (requires Kubernetes >= v1.33 with the `ImageVolume` feature gate, + /// or >= v1.36 where it is GA). + #[default] + ImageVolume, + /// Copy the binary via an init container and emptyDir volume. + /// Works on all Kubernetes versions. + InitContainer, +} + +impl std::fmt::Display for SupervisorSideloadMethod { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ImageVolume => f.write_str("image-volume"), + Self::InitContainer => f.write_str("init-container"), + } + } +} + +impl std::str::FromStr for SupervisorSideloadMethod { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "image-volume" => Ok(Self::ImageVolume), + "init-container" => Ok(Self::InitContainer), + other => Err(format!( + "unknown supervisor sideload method '{other}'; expected 'image-volume' or 'init-container'" + )), + } + } +} + #[derive(Debug, Clone)] pub struct KubernetesComputeConfig { pub namespace: String, pub default_image: String, pub image_pull_policy: String, /// Image that provides the `openshell-sandbox` supervisor binary. - /// An init container copies the binary from this image into a shared - /// emptyDir volume before the sandbox container starts. + /// Mounted directly as an image volume, or copied via an init container, + /// depending on `supervisor_sideload_method`. pub supervisor_image: String, - /// Kubernetes `imagePullPolicy` for the supervisor init container. + /// Kubernetes `imagePullPolicy` for the supervisor image. /// Empty string delegates to the Kubernetes default. pub supervisor_image_pull_policy: String, + /// How the supervisor binary is delivered into sandbox pods. + pub supervisor_sideload_method: SupervisorSideloadMethod, pub grpc_endpoint: String, pub ssh_socket_path: String, pub ssh_handshake_secret: String, diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 668a18d8c..b49a068ab 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -3,7 +3,7 @@ //! Kubernetes compute driver. -use crate::config::KubernetesComputeConfig; +use crate::config::{KubernetesComputeConfig, SupervisorSideloadMethod}; use futures::{Stream, StreamExt, TryStreamExt}; use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node}; use kube::api::{Api, ApiResource, DeleteParams, ListParams, PostParams}; @@ -313,6 +313,7 @@ impl KubernetesComputeDriver { image_pull_policy: &self.config.image_pull_policy, supervisor_image: &self.config.supervisor_image, supervisor_image_pull_policy: &self.config.supervisor_image_pull_policy, + supervisor_sideload_method: self.config.supervisor_sideload_method, sandbox_id: &sandbox.id, sandbox_name: &sandbox.name, grpc_endpoint: &self.config.grpc_endpoint, @@ -694,6 +695,27 @@ fn supervisor_volume_mount() -> serde_json::Value { /// resolution inside the image. const SUPERVISOR_IMAGE_BINARY_PATH: &str = "/openshell-sandbox"; +/// Build an image volume that mounts the supervisor OCI image directly. +/// +/// Requires Kubernetes >= v1.33 (`ImageVolume` beta) or >= v1.36 (GA). +/// The entire image filesystem is mounted read-only, making the binary +/// available at `{SUPERVISOR_MOUNT_PATH}/openshell-sandbox`. +fn supervisor_image_volume( + supervisor_image: &str, + supervisor_image_pull_policy: &str, +) -> serde_json::Value { + let mut image_spec = serde_json::json!({ + "reference": supervisor_image, + }); + if !supervisor_image_pull_policy.is_empty() { + image_spec["pullPolicy"] = serde_json::json!(supervisor_image_pull_policy); + } + serde_json::json!({ + "name": SUPERVISOR_VOLUME_NAME, + "image": image_spec + }) +} + /// Build the init container that copies the supervisor binary into the emptyDir. /// /// The supervisor image contains the supervisor binary at `/openshell-sandbox`. @@ -730,43 +752,56 @@ fn supervisor_init_container( /// Apply supervisor side-load transforms to an already-built pod template JSON. /// -/// Injects an emptyDir volume, an init container that copies the supervisor -/// binary from the supervisor image into that volume, and a read-only volume -/// mount + command override on the agent container. +/// Depending on the sideload method: +/// - **`ImageVolume`**: mounts the supervisor OCI image directly as a read-only +/// volume (no init container needed, requires K8s >= v1.33). +/// - **`InitContainer`**: injects an emptyDir volume and an init container that +/// copies the supervisor binary from the supervisor image into that volume. /// -/// The `runAsUser: 0` override ensures the supervisor binary runs as root -/// regardless of the image's `USER` directive. The supervisor needs root for -/// network namespace creation, proxy setup, and Landlock/seccomp configuration. -/// It drops to the appropriate non-root user for child processes via the -/// policy's `run_as_user`/`run_as_group`. +/// In both cases, the agent container gets a command override to run the +/// side-loaded binary and `runAsUser: 0` so it can create network namespaces, +/// set up the proxy, and configure Landlock/seccomp. fn apply_supervisor_sideload( pod_template: &mut serde_json::Value, supervisor_image: &str, supervisor_image_pull_policy: &str, + method: SupervisorSideloadMethod, ) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; }; - // 1. Add the emptyDir volume to spec.volumes + // 1. Add the volume (image source or emptyDir depending on method) let volumes = spec .entry("volumes") .or_insert_with(|| serde_json::json!([])) .as_array_mut(); if let Some(volumes) = volumes { - volumes.push(supervisor_volume()); + match method { + SupervisorSideloadMethod::ImageVolume => { + volumes.push(supervisor_image_volume( + supervisor_image, + supervisor_image_pull_policy, + )); + } + SupervisorSideloadMethod::InitContainer => { + volumes.push(supervisor_volume()); + } + } } - // 2. Add the init container that copies the binary into the emptyDir - let init_containers = spec - .entry("initContainers") - .or_insert_with(|| serde_json::json!([])) - .as_array_mut(); - if let Some(init_containers) = init_containers { - init_containers.push(supervisor_init_container( - supervisor_image, - supervisor_image_pull_policy, - )); + // 2. Add the init container only for the init-container method + if method == SupervisorSideloadMethod::InitContainer { + let init_containers = spec + .entry("initContainers") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut(); + if let Some(init_containers) = init_containers { + init_containers.push(supervisor_init_container( + supervisor_image, + supervisor_image_pull_policy, + )); + } } // 3. Find the agent container and add volume mount + command override @@ -934,6 +969,7 @@ struct SandboxPodParams<'a> { image_pull_policy: &'a str, supervisor_image: &'a str, supervisor_image_pull_policy: &'a str, + supervisor_sideload_method: SupervisorSideloadMethod, sandbox_id: &'a str, sandbox_name: &'a str, grpc_endpoint: &'a str, @@ -1166,12 +1202,11 @@ fn sandbox_template_to_k8s( let mut result = serde_json::Value::Object(template_value); - // Side-load the supervisor binary via an init container that copies it - // from the supervisor image into a shared emptyDir volume. apply_supervisor_sideload( &mut result, params.supervisor_image, params.supervisor_image_pull_policy, + params.supervisor_sideload_method, ); // Inject workspace persistence (init container + PVC volume mount) so @@ -1495,7 +1530,12 @@ mod tests { } }); - apply_supervisor_sideload(&mut pod_template, "custom-image:latest", "IfNotPresent"); + apply_supervisor_sideload( + &mut pod_template, + "custom-image:latest", + "IfNotPresent", + SupervisorSideloadMethod::InitContainer, + ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; assert_eq!(sc["runAsUser"], 0, "runAsUser must be 0 for supervisor"); @@ -1519,7 +1559,12 @@ mod tests { } }); - apply_supervisor_sideload(&mut pod_template, "supervisor-image:latest", "IfNotPresent"); + apply_supervisor_sideload( + &mut pod_template, + "supervisor-image:latest", + "IfNotPresent", + SupervisorSideloadMethod::InitContainer, + ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; assert_eq!( @@ -1539,7 +1584,12 @@ mod tests { } }); - apply_supervisor_sideload(&mut pod_template, "supervisor-image:latest", "IfNotPresent"); + apply_supervisor_sideload( + &mut pod_template, + "supervisor-image:latest", + "IfNotPresent", + SupervisorSideloadMethod::InitContainer, + ); // Volume should be an emptyDir let volumes = pod_template["spec"]["volumes"] @@ -1597,6 +1647,86 @@ mod tests { assert_eq!(mounts[0]["readOnly"], true); } + #[test] + fn supervisor_sideload_image_volume_injects_image_source_without_init_container() { + let mut pod_template = serde_json::json!({ + "spec": { + "containers": [{ + "name": "agent", + "image": "custom-image:latest" + }] + } + }); + + apply_supervisor_sideload( + &mut pod_template, + "supervisor-image:latest", + "IfNotPresent", + SupervisorSideloadMethod::ImageVolume, + ); + + let volumes = pod_template["spec"]["volumes"] + .as_array() + .expect("volumes should exist"); + assert_eq!(volumes.len(), 1); + assert_eq!(volumes[0]["name"], SUPERVISOR_VOLUME_NAME); + assert_eq!(volumes[0]["image"]["reference"], "supervisor-image:latest"); + assert_eq!(volumes[0]["image"]["pullPolicy"], "IfNotPresent"); + assert!( + volumes[0]["emptyDir"].is_null(), + "image volume method must not use emptyDir" + ); + + assert!( + pod_template["spec"]["initContainers"].is_null(), + "image volume method must not inject init containers" + ); + + let command = pod_template["spec"]["containers"][0]["command"] + .as_array() + .expect("command should be set"); + assert_eq!( + command[0].as_str().unwrap(), + format!("{SUPERVISOR_MOUNT_PATH}/openshell-sandbox") + ); + + let sc = &pod_template["spec"]["containers"][0]["securityContext"]; + assert_eq!(sc["runAsUser"], 0); + + let mounts = pod_template["spec"]["containers"][0]["volumeMounts"] + .as_array() + .expect("volumeMounts should exist"); + assert_eq!(mounts[0]["name"], SUPERVISOR_VOLUME_NAME); + assert_eq!(mounts[0]["mountPath"], SUPERVISOR_MOUNT_PATH); + assert_eq!(mounts[0]["readOnly"], true); + } + + #[test] + fn supervisor_image_volume_omits_pull_policy_when_empty() { + let mut pod_template = serde_json::json!({ + "spec": { + "containers": [{ + "name": "agent", + "image": "custom-image:latest" + }] + } + }); + + apply_supervisor_sideload( + &mut pod_template, + "supervisor-image:latest", + "", + SupervisorSideloadMethod::ImageVolume, + ); + + let volume = &pod_template["spec"]["volumes"][0]; + assert_eq!(volume["image"]["reference"], "supervisor-image:latest"); + assert!( + volume["image"].get("pullPolicy").is_none(), + "pullPolicy should be omitted when empty" + ); + } + /// Regression test: TLS mount path must match env var paths. /// The volume is mounted at a specific path and the env vars must point to /// files within that same path, otherwise the sandbox will fail to start @@ -1952,7 +2082,10 @@ mod tests { #[test] fn workspace_persistence_skipped_when_inject_workspace_false() { - let params = SandboxPodParams::default(); + let params = SandboxPodParams { + supervisor_sideload_method: SupervisorSideloadMethod::InitContainer, + ..SandboxPodParams::default() + }; let pod_template = sandbox_template_to_k8s( &SandboxTemplate::default(), false, diff --git a/crates/openshell-driver-kubernetes/src/lib.rs b/crates/openshell-driver-kubernetes/src/lib.rs index 54149fe83..7975ca788 100644 --- a/crates/openshell-driver-kubernetes/src/lib.rs +++ b/crates/openshell-driver-kubernetes/src/lib.rs @@ -5,6 +5,6 @@ pub mod config; pub mod driver; pub mod grpc; -pub use config::KubernetesComputeConfig; +pub use config::{KubernetesComputeConfig, SupervisorSideloadMethod}; pub use driver::{KubernetesComputeDriver, KubernetesDriverError}; pub use grpc::ComputeDriverService; diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index 9e39e9d28..55ad78dcb 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -11,6 +11,7 @@ use openshell_core::VERSION; use openshell_core::proto::compute::v1::compute_driver_server::ComputeDriverServer; use openshell_driver_kubernetes::{ ComputeDriverService, KubernetesComputeConfig, KubernetesComputeDriver, + SupervisorSideloadMethod, }; #[derive(Parser, Debug)] @@ -64,6 +65,13 @@ struct Args { #[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY")] supervisor_image_pull_policy: Option, + #[arg( + long, + env = "OPENSHELL_SUPERVISOR_SIDELOAD_METHOD", + default_value = "image-volume" + )] + supervisor_sideload_method: SupervisorSideloadMethod, + #[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")] enable_user_namespaces: bool, } @@ -85,6 +93,7 @@ async fn main() -> Result<()> { .supervisor_image .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()), supervisor_image_pull_policy: args.supervisor_image_pull_policy.unwrap_or_default(), + supervisor_sideload_method: args.supervisor_sideload_method, grpc_endpoint: args.grpc_endpoint.unwrap_or_default(), ssh_socket_path: args.sandbox_ssh_socket_path, ssh_handshake_secret: args.ssh_handshake_secret, diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index eaca911e4..07c3cef5c 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -487,6 +487,13 @@ async fn build_compute_runtime( image_pull_policy: config.sandbox_image_pull_policy.clone(), supervisor_image, supervisor_image_pull_policy, + supervisor_sideload_method: std::env::var( + "OPENSHELL_SUPERVISOR_SIDELOAD_METHOD", + ) + .ok() + .filter(|s| !s.is_empty()) + .and_then(|s| s.parse().ok()) + .unwrap_or_default(), grpc_endpoint: config.grpc_endpoint.clone(), ssh_socket_path: config.sandbox_ssh_socket_path.clone(), ssh_handshake_secret: config.ssh_handshake_secret.clone(), diff --git a/deploy/helm/openshell/templates/_helpers.tpl b/deploy/helm/openshell/templates/_helpers.tpl index c0c8562c1..b52a224b2 100644 --- a/deploy/helm/openshell/templates/_helpers.tpl +++ b/deploy/helm/openshell/templates/_helpers.tpl @@ -98,6 +98,24 @@ the in-cluster Service DNS, release namespace, service port, and disableTls flag — so the default value works for any release name or namespace without override. */}} +{{/* +Supervisor sideload method. When supervisor.sideloadMethod is set, use it +verbatim. Otherwise auto-detect from the cluster version: K8s >= v1.33 +supports ImageVolumeSource (beta; GA in v1.36), older clusters fall back to +the init-container pattern. +*/}} +{{- define "openshell.supervisorSideloadMethod" -}} +{{- if .Values.supervisor.sideloadMethod -}} +{{- .Values.supervisor.sideloadMethod -}} +{{- else -}} +{{- if semverCompare ">=1.33-0" .Capabilities.KubeVersion.Version -}} +image-volume +{{- else -}} +init-container +{{- end -}} +{{- end -}} +{{- end }} + {{- define "openshell.grpcEndpoint" -}} {{- if .Values.server.grpcEndpoint -}} {{- .Values.server.grpcEndpoint -}} diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml index b28e99c66..4a0b70621 100644 --- a/deploy/helm/openshell/templates/statefulset.yaml +++ b/deploy/helm/openshell/templates/statefulset.yaml @@ -76,6 +76,8 @@ spec: - name: OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY value: {{ .Values.supervisor.image.pullPolicy | quote }} {{- end }} + - name: OPENSHELL_SUPERVISOR_SIDELOAD_METHOD + value: {{ include "openshell.supervisorSideloadMethod" . | quote }} - name: OPENSHELL_GRPC_ENDPOINT value: {{ include "openshell.grpcEndpoint" . | quote }} {{- if .Values.server.sshGatewayHost }} diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 389efc132..4adee92fa 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -10,14 +10,20 @@ image: pullPolicy: IfNotPresent tag: "" -# Supervisor image — provides the openshell-sandbox binary that is copied into -# sandbox pods via an init container. tag defaults to appVersion (same as the -# gateway image) so both stay in sync when the chart is released. +# Supervisor image — provides the openshell-sandbox binary injected into sandbox +# pods. tag defaults to appVersion (same as the gateway image) so both stay in +# sync when the chart is released. supervisor: image: repository: ghcr.io/nvidia/openshell/supervisor pullPolicy: "" tag: "" + # How the supervisor binary is delivered into sandbox pods. + # Empty (default) = auto-detect from cluster version: + # K8s >= v1.33 → "image-volume" (mounts OCI image directly, GA in v1.36) + # K8s < v1.33 → "init-container" (copies via init container + emptyDir) + # Set explicitly to override auto-detection. + sideloadMethod: "" imagePullSecrets: [] nameOverride: "openshell"