From b17435a2cbfde7bd1f055535a3c929678b870ed8 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 20 Mar 2026 05:56:56 +0000 Subject: [PATCH 1/2] feat(vmm): add OCI registry image discovery and pull support Add ability for VMM to discover available guest images from an OCI registry and pull them on-demand through the web UI. Images are pulled in the background with status tracked server-side, surviving page refreshes. The UI auto-refreshes every 3s while the registry panel is open. - New `image_registry` config field (e.g., "cr.kvin.wang/dstack/guest-image") - New RPC: ListRegistryImages, PullRegistryImage - Registry module: list tags via Docker Registry HTTP API v2, pull and extract via `docker export` - Background pull with pulling state in App memory - UI: Image Registry button + dialog with pull/status per tag --- Cargo.lock | 3 + vmm/Cargo.toml | 3 + vmm/rpc/proto/vmm_rpc.proto | 30 ++ vmm/src/app.rs | 20 +- vmm/src/app/registry.rs | 473 +++++++++++++++++++++++++ vmm/src/config.rs | 43 ++- vmm/src/main.rs | 2 +- vmm/src/main_service.rs | 127 ++++++- vmm/src/one_shot.rs | 2 +- vmm/ui/src/composables/useVmManager.ts | 83 +++++ vmm/ui/src/templates/app.html | 93 +++++ vmm/vmm.toml | 6 + 12 files changed, 868 insertions(+), 17 deletions(-) create mode 100644 vmm/src/app/registry.rs diff --git a/Cargo.lock b/Cargo.lock index 7b49e9fe7..e7485c1e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2663,6 +2663,7 @@ dependencies = [ "dstack-types", "dstack-vmm-rpc", "fatfs", + "flate2", "fs-err", "fscommon", "git-version", @@ -2679,6 +2680,7 @@ dependencies = [ "or-panic", "path-absolutize", "ra-rpc", + "reqwest", "rocket", "rocket-apitoken", "rocket-vsock-listener", @@ -2693,6 +2695,7 @@ dependencies = [ "strip-ansi-escapes", "supervisor-client", "tailf", + "tar", "tokio", "tracing", "tracing-subscriber", diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 573d167d8..0376b30e6 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -56,6 +56,9 @@ fatfs.workspace = true fscommon.workspace = true or-panic.workspace = true url.workspace = true +reqwest.workspace = true +flate2.workspace = true +tar.workspace = true [dev-dependencies] insta.workspace = true diff --git a/vmm/rpc/proto/vmm_rpc.proto b/vmm/rpc/proto/vmm_rpc.proto index fb22bdb9d..8d79fea66 100644 --- a/vmm/rpc/proto/vmm_rpc.proto +++ b/vmm/rpc/proto/vmm_rpc.proto @@ -350,6 +350,13 @@ service Vmm { rpc SvStop(Id) returns (google.protobuf.Empty); // Remove a stopped supervisor process by ID. rpc SvRemove(Id) returns (google.protobuf.Empty); + + // List images available in the configured OCI registry. + rpc ListRegistryImages(google.protobuf.Empty) returns (RegistryImageListResponse); + // Pull an image from the OCI registry to local storage. + rpc PullRegistryImage(PullRegistryImageRequest) returns (google.protobuf.Empty); + // Delete a local guest image by name. + rpc DeleteImage(Id) returns (google.protobuf.Empty); } // DHCP lease event reported by the host DHCP server. @@ -365,6 +372,29 @@ message SvListResponse { repeated SvProcessInfo processes = 1; } +// Available images discovered from the OCI registry. +message RegistryImageListResponse { + repeated RegistryImageInfo images = 1; +} + +// Metadata for an image tag in the OCI registry. +message RegistryImageInfo { + // Tag name (e.g., "0.5.8", "nvidia-0.5.8") + string tag = 1; + // Whether this image is already downloaded locally + bool local = 2; + // Whether this image is currently being pulled + bool pulling = 3; + // Error message from the last failed pull attempt (empty if no error) + string error = 4; +} + +// Request to pull an image from the OCI registry. +message PullRegistryImageRequest { + // Tag to pull (e.g., "0.5.8") + string tag = 1; +} + // Information about a single supervisor process. message SvProcessInfo { string id = 1; diff --git a/vmm/src/app.rs b/vmm/src/app.rs index 71ab54485..b32116427 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -35,6 +35,7 @@ pub use qemu::{VmConfig, VmWorkDir}; mod id_pool; mod image; mod qemu; +pub(crate) mod registry; #[derive(Deserialize, Serialize, Debug, Clone)] pub struct PortMapping { @@ -118,16 +119,24 @@ pub struct GpuSpec { pub slot: String, } +#[derive(Clone, Debug)] +pub(crate) enum PullStatus { + Pulling, + Failed(String), +} + #[derive(Clone)] pub struct App { pub config: Arc, pub supervisor: SupervisorClient, state: Arc>, forward_service: Arc>, + /// Pull status for registry images: tag → status. + pub(crate) pull_status: Arc>>, } impl App { - fn lock(&self) -> MutexGuard<'_, AppState> { + pub(crate) fn lock(&self) -> MutexGuard<'_, AppState> { self.state.lock().or_panic("mutex poisoned") } @@ -152,6 +161,7 @@ impl App { })), config: Arc::new(config), forward_service: Arc::new(tokio::sync::Mutex::new(ForwardService::new())), + pull_status: Arc::new(Mutex::new(std::collections::HashMap::new())), } } @@ -172,7 +182,7 @@ impl App { { bail!("Invalid image name"); } - let image_path = self.config.image_path.join(&manifest.image); + let image_path = self.config.image.path.join(&manifest.image); let image = Image::load(&image_path).context("Failed to load image")?; let vm_id = manifest.id.clone(); let app_compose = vm_work_dir @@ -739,7 +749,7 @@ impl App { { bail!("Invalid image name"); } - let image_path = self.config.image_path.join(&manifest.image); + let image_path = self.config.image.path.join(&manifest.image); let image = Image::load(&image_path).context("Failed to load image")?; let vm_id = manifest.id.clone(); let already_running = cids_assigned.contains_key(&vm_id); @@ -854,7 +864,7 @@ impl App { } pub fn list_images(&self) -> Result> { - let image_path = self.config.image_path.clone(); + let image_path = self.config.image.path.clone(); let images = fs::read_dir(image_path).context("Failed to read image directory")?; Ok(images .flat_map(|entry| { @@ -1115,7 +1125,7 @@ fn rotate_serial_log(work_dir: &VmWorkDir, max_bytes: u64) { } pub(crate) fn make_sys_config(cfg: &Config, manifest: &Manifest) -> Result { - let image_path = cfg.image_path.join(&manifest.image); + let image_path = cfg.image.path.join(&manifest.image); let image = Image::load(image_path).context("Failed to load image info")?; let img_ver = image.info.version_tuple().unwrap_or((0, 0, 0)); let kms_urls = if manifest.kms_urls.is_empty() { diff --git a/vmm/src/app/registry.rs b/vmm/src/app/registry.rs new file mode 100644 index 000000000..d329af7c5 --- /dev/null +++ b/vmm/src/app/registry.rs @@ -0,0 +1,473 @@ +// SPDX-FileCopyrightText: © 2025 Phala Network +// +// SPDX-License-Identifier: Apache-2.0 + +//! OCI Distribution API client for pulling dstack guest images directly from +//! a container registry without requiring a local Docker daemon. + +use std::path::Path; + +use anyhow::{bail, Context, Result}; +use flate2::read::GzDecoder; +use reqwest::Client; +use serde::Deserialize; +use tracing::info; + +fn build_client() -> Result { + Ok(Client::builder() + .timeout(std::time::Duration::from_secs(600)) + .build()?) +} + +// ─── Tag listing ──────────────────────────────────────────────────────────── + +/// List tags from a Docker Registry HTTP API v2 endpoint. +/// +/// `image_ref` is in the form `registry.example.com/repo/name`. +pub async fn list_registry_tags(image_ref: &str) -> Result> { + let (registry, repo) = parse_image_ref(image_ref)?; + let client = build_client()?; + + let url = format!("https://{registry}/v2/{repo}/tags/list"); + info!("fetching registry tags from {url}"); + + let response = client + .get(&url) + .send() + .await + .context("failed to fetch registry tags")?; + + if response.status() == reqwest::StatusCode::UNAUTHORIZED { + return list_tags_with_token(&client, ®istry, &repo).await; + } + + if !response.status().is_success() { + bail!( + "registry returned HTTP {}: {}", + response.status(), + response.text().await.unwrap_or_default() + ); + } + + let tag_list: TagList = response + .json() + .await + .context("failed to parse registry tag list")?; + + Ok(tag_list.tags.unwrap_or_default()) +} + +/// Handle token-based auth (Docker Hub / registries requiring Bearer token). +async fn list_tags_with_token(client: &Client, registry: &str, repo: &str) -> Result> { + let token = fetch_token(client, registry, repo).await?; + let url = format!("https://{registry}/v2/{repo}/tags/list"); + let response = client + .get(&url) + .bearer_auth(&token) + .send() + .await + .context("failed to fetch registry tags with token")?; + + if !response.status().is_success() { + bail!( + "registry returned HTTP {} after auth: {}", + response.status(), + response.text().await.unwrap_or_default() + ); + } + + let tag_list: TagList = response + .json() + .await + .context("failed to parse registry tag list")?; + + Ok(tag_list.tags.unwrap_or_default()) +} + +// ─── Image pulling ────────────────────────────────────────────────────────── + +/// Pull an image from registry and extract to the local image directory. +/// +/// Fetches the OCI manifest, downloads each layer blob, and extracts +/// the tar (gzipped) contents into a flat directory. +pub async fn pull_and_extract(image_ref: &str, tag: &str, image_path: &Path) -> Result<()> { + let (registry, repo) = parse_image_ref(image_ref)?; + let client = build_client()?; + + info!("pulling image {image_ref}:{tag}"); + + // Resolve authentication + let token = try_fetch_token(&client, ®istry, &repo).await; + + // Fetch manifest + let manifest = fetch_manifest(&client, ®istry, &repo, tag, token.as_deref()).await?; + + // Determine output directory + let output_dir = determine_output_dir(tag, image_path); + if output_dir.exists() { + bail!("image directory already exists: {}", output_dir.display()); + } + + // Extract into temp dir first, then rename atomically + let tmp_dir = image_path.join(format!(".tmp-pull-{tag}")); + if tmp_dir.exists() { + fs_err::remove_dir_all(&tmp_dir).context("failed to clean up stale temp dir")?; + } + fs_err::create_dir_all(&tmp_dir)?; + + let result = download_and_extract_layers( + &client, + ®istry, + &repo, + &manifest, + token.as_deref(), + &tmp_dir, + ) + .await; + + if let Err(e) = &result { + tracing::error!("pull failed, cleaning up temp dir: {e:#}"); + let _ = fs_err::remove_dir_all(&tmp_dir); + return result; + } + + // Verify metadata.json exists + if !tmp_dir.join("metadata.json").exists() { + let _ = fs_err::remove_dir_all(&tmp_dir); + bail!("pulled image does not contain metadata.json - not a valid dstack guest image"); + } + + // Move to final location + fs_err::rename(&tmp_dir, &output_dir).with_context(|| { + format!( + "failed to rename {} to {}", + tmp_dir.display(), + output_dir.display() + ) + })?; + + info!("image extracted to {}", output_dir.display()); + Ok(()) +} + +/// Fetch OCI image manifest. +fn fetch_manifest<'a>( + client: &'a Client, + registry: &'a str, + repo: &'a str, + tag: &'a str, + token: Option<&'a str>, +) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { fetch_manifest_inner(client, registry, repo, tag, token).await }) +} + +async fn fetch_manifest_inner( + client: &Client, + registry: &str, + repo: &str, + tag: &str, + token: Option<&str>, +) -> Result { + let url = format!("https://{registry}/v2/{repo}/manifests/{tag}"); + + let mut req = client.get(&url).header( + "Accept", + "application/vnd.oci.image.manifest.v1+json, application/vnd.oci.image.index.v1+json, application/vnd.docker.distribution.manifest.v2+json, application/vnd.docker.distribution.manifest.list.v2+json", + ); + if let Some(t) = token { + req = req.bearer_auth(t); + } + + let response = req.send().await.context("failed to fetch manifest")?; + + if !response.status().is_success() { + bail!( + "failed to fetch manifest: HTTP {} {}", + response.status(), + response.text().await.unwrap_or_default() + ); + } + + // Try to parse as a single manifest first + let body = response.text().await?; + if let Ok(manifest) = serde_json::from_str::(&body) { + if !manifest.layers.is_empty() { + return Ok(manifest); + } + } + + // Might be an index/manifest list — pick the first manifest + if let Ok(index) = serde_json::from_str::(&body) { + if let Some(first) = index.manifests.into_iter().find(|m| { + // Prefer the non-attestation manifest + !m.media_type + .as_deref() + .is_some_and(|mt| mt.contains("attestation")) + }) { + return fetch_manifest(client, registry, repo, &first.digest, token).await; + } + } + + bail!("unsupported manifest format"); +} + +/// Download and extract all layer blobs into `dest`. +async fn download_and_extract_layers( + client: &Client, + registry: &str, + repo: &str, + manifest: &OciManifest, + token: Option<&str>, + dest: &Path, +) -> Result<()> { + for (i, layer) in manifest.layers.iter().enumerate() { + let size_mb = layer.size as f64 / 1_048_576.0; + info!( + "downloading layer {}/{}: {} ({:.1} MB)", + i + 1, + manifest.layers.len(), + &layer.digest[..19.min(layer.digest.len())], + size_mb, + ); + + let url = format!("https://{registry}/v2/{repo}/blobs/{}", layer.digest); + let mut req = client.get(&url); + if let Some(t) = token { + req = req.bearer_auth(t); + } + + let response = req + .send() + .await + .with_context(|| format!("failed to download layer {}", layer.digest))?; + + if !response.status().is_success() { + bail!( + "failed to download blob {}: HTTP {}", + layer.digest, + response.status() + ); + } + + let bytes = response.bytes().await.context("failed to read blob body")?; + extract_layer(&bytes, &layer.media_type, dest)?; + } + + Ok(()) +} + +/// Extract a single layer (tar+gzip or tar) into `dest`. +fn extract_layer(data: &[u8], media_type: &str, dest: &Path) -> Result<()> { + let is_gzip = media_type.contains("gzip") + || media_type.contains("tar+gzip") + || (data.len() >= 2 && data[0] == 0x1f && data[1] == 0x8b); + + if is_gzip { + let decoder = GzDecoder::new(data); + let mut archive = tar::Archive::new(decoder); + archive + .unpack(dest) + .context("failed to extract gzipped tar layer")?; + } else { + let mut archive = tar::Archive::new(data); + archive + .unpack(dest) + .context("failed to extract tar layer")?; + } + + // Remove docker/OCI artifact directories that may appear in layers + for dir in &["dev", "etc", "proc", "sys"] { + let d = dest.join(dir); + if d.is_dir() { + let _ = fs_err::remove_dir(&d); + } + } + + Ok(()) +} + +// ─── Token auth ───────────────────────────────────────────────────────────── + +/// Try to fetch a Bearer token. Returns None if the registry doesn't need one. +async fn try_fetch_token(client: &Client, registry: &str, repo: &str) -> Option { + // Probe the /v2/ endpoint to check if auth is needed + let probe = client + .get(format!("https://{registry}/v2/")) + .send() + .await + .ok()?; + + if probe.status() != reqwest::StatusCode::UNAUTHORIZED { + return None; + } + + // Parse WWW-Authenticate header for realm and service + let www_auth = probe + .headers() + .get("www-authenticate") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + let (realm, service) = parse_www_authenticate(www_auth); + + let token_url = if !realm.is_empty() { + format!("{realm}?service={service}&scope=repository:{repo}:pull") + } else { + format!("https://{registry}/v2/token?service={registry}&scope=repository:{repo}:pull") + }; + + let resp = client.get(&token_url).send().await.ok()?; + if !resp.status().is_success() { + return None; + } + + let token_data: TokenResponse = resp.json().await.ok()?; + Some(token_data.token) +} + +async fn fetch_token(client: &Client, registry: &str, repo: &str) -> Result { + try_fetch_token(client, registry, repo) + .await + .context("registry requires authentication but token exchange failed") +} + +/// Extract realm and service from a WWW-Authenticate: Bearer header. +fn parse_www_authenticate(header: &str) -> (String, String) { + let mut realm = String::new(); + let mut service = String::new(); + + for part in header.split(',') { + let part = part.trim(); + if let Some(v) = part + .strip_prefix("Bearer realm=\"") + .or_else(|| part.strip_prefix("realm=\"")) + { + realm = v.trim_end_matches('"').to_string(); + } else if let Some(v) = part.strip_prefix("service=\"") { + service = v.trim_end_matches('"').to_string(); + } + } + + (realm, service) +} + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +fn determine_output_dir(tag: &str, image_path: &Path) -> std::path::PathBuf { + let dir_name = if tag.starts_with("dstack-") { + tag.to_string() + } else { + format!("dstack-{tag}") + }; + image_path.join(dir_name) +} + +/// Parse "registry.example.com/repo/name" into ("registry.example.com", "repo/name"). +/// +/// For Docker Hub short names like "dstacktee/guest-image" (no dots in the +/// first component), automatically expands to "registry-1.docker.io/dstacktee/guest-image". +fn parse_image_ref(image_ref: &str) -> Result<(String, String)> { + let trimmed = image_ref + .trim_start_matches("https://") + .trim_start_matches("http://"); + + let first_slash = trimmed + .find('/') + .context("invalid image reference: no repository path")?; + + let first_component = &trimmed[..first_slash]; + let repo = &trimmed[first_slash + 1..]; + + if repo.is_empty() { + bail!("invalid image reference: empty repository"); + } + + // Docker Hub short names don't contain dots or colons + let registry = if first_component.contains('.') || first_component.contains(':') { + first_component.to_string() + } else { + // Docker Hub: "user/repo" → "registry-1.docker.io" + // and the repo needs "library/" prefix for official images + return Ok(( + "registry-1.docker.io".to_string(), + format!("{first_component}/{repo}"), + )); + }; + + Ok((registry, repo.to_string())) +} + +// ─── OCI types ────────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct TagList { + tags: Option>, +} + +#[derive(Deserialize)] +struct TokenResponse { + token: String, +} + +#[derive(Deserialize, Debug)] +struct OciManifest { + #[serde(default)] + layers: Vec, +} + +#[derive(Deserialize, Debug)] +struct OciLayer { + #[serde(rename = "mediaType", default)] + media_type: String, + digest: String, + size: u64, +} + +#[derive(Deserialize, Debug)] +struct OciIndex { + manifests: Vec, +} + +#[derive(Deserialize, Debug)] +struct OciIndexEntry { + #[serde(rename = "mediaType")] + media_type: Option, + digest: String, +} + +// ─── Tests ────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_image_ref_private_registry() { + let (reg, repo) = parse_image_ref("cr.kvin.wang/dstack/guest-image").unwrap(); + assert_eq!(reg, "cr.kvin.wang"); + assert_eq!(repo, "dstack/guest-image"); + } + + #[test] + fn test_parse_image_ref_docker_hub() { + let (reg, repo) = parse_image_ref("dstacktee/guest-image").unwrap(); + assert_eq!(reg, "registry-1.docker.io"); + assert_eq!(repo, "dstacktee/guest-image"); + } + + #[test] + fn test_parse_image_ref_with_scheme() { + let (reg, repo) = parse_image_ref("https://ghcr.io/dstack-tee/guest-image").unwrap(); + assert_eq!(reg, "ghcr.io"); + assert_eq!(repo, "dstack-tee/guest-image"); + } + + #[test] + fn test_parse_www_authenticate() { + let (realm, service) = parse_www_authenticate( + r#"Bearer realm="https://auth.docker.io/token",service="registry.docker.io""#, + ); + assert_eq!(realm, "https://auth.docker.io/token"); + assert_eq!(service, "registry.docker.io"); + } +} diff --git a/vmm/src/config.rs b/vmm/src/config.rs index bb8c218e0..cdb587456 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -309,10 +309,21 @@ pub struct GatewayConfig { pub agent_port: u16, } +#[derive(Debug, Clone, Deserialize, Default)] +pub struct ImageConfig { + /// Path to guest image directory + #[serde(default)] + pub path: PathBuf, + /// OCI image registry for guest images (e.g., "dstacktee/guest-image") + #[serde(default)] + pub registry: String, +} + #[derive(Debug, Clone, Deserialize)] pub struct Config { + /// Deprecated: use `[image] path` instead. Kept for backward compatibility. #[serde(default)] - pub image_path: PathBuf, + image_path: PathBuf, #[serde(default)] pub run_path: PathBuf, /// The URL of the KMS server @@ -322,6 +333,10 @@ pub struct Config { #[serde(default)] pub node_name: String, + /// Image configuration + #[serde(default)] + pub image: ImageConfig, + /// The buffer size in VMM process for guest events pub event_buffer_size: usize, @@ -361,12 +376,10 @@ impl ProcessAnnotation { } impl Config { - pub fn abs_path(self) -> Result { - Ok(Self { - image_path: self.image_path.absolutize()?.to_path_buf(), - run_path: self.run_path.absolutize()?.to_path_buf(), - ..self - }) + pub fn abs_path(mut self) -> Result { + self.image.path = self.image.path.absolutize()?.to_path_buf(); + self.run_path = self.run_path.absolutize()?.to_path_buf(); + Ok(self) } } @@ -496,8 +509,20 @@ impl Config { { let home = dirs::home_dir().context("Failed to get home directory")?; let app_home = home.join(".dstack-vmm"); - if me.image_path == PathBuf::default() { - me.image_path = app_home.join("image"); + // Migrate deprecated top-level `image_path` to `[image] path` + if me.image_path != PathBuf::default() { + if me.image.path == PathBuf::default() { + warn!( + "config: top-level `image_path` is deprecated, use `[image] path` instead" + ); + me.image.path = me.image_path.clone(); + } else { + warn!("config: both `image_path` and `[image] path` are set, using `[image] path`"); + } + me.image_path = PathBuf::default(); + } + if me.image.path == PathBuf::default() { + me.image.path = app_home.join("image"); } if me.run_path == PathBuf::default() { me.run_path = app_home.join("vm"); diff --git a/vmm/src/main.rs b/vmm/src/main.rs index ee5d63f54..9803785d8 100644 --- a/vmm/src/main.rs +++ b/vmm/src/main.rs @@ -198,7 +198,7 @@ async fn main() -> Result<()> { let _discovery_reg = discovery::DiscoveryRegistration::register( &listen_address, args.config.as_deref(), - &config.image_path, + &config.image.path, &config.run_path, &config.node_name, &app_version(), diff --git a/vmm/src/main_service.rs b/vmm/src/main_service.rs index 1fb98b434..88aa884c1 100644 --- a/vmm/src/main_service.rs +++ b/vmm/src/main_service.rs @@ -12,11 +12,13 @@ use dstack_vmm_rpc::vmm_server::{VmmRpc, VmmServer}; use dstack_vmm_rpc::{ AppId, ComposeHash as RpcComposeHash, DhcpLeaseRequest, GatewaySettings, GetInfoResponse, GetMetaResponse, Id, ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, - ListGpusResponse, PublicKeyResponse, ReloadVmsResponse, ResizeVmRequest, ResourcesSettings, + ListGpusResponse, PublicKeyResponse, PullRegistryImageRequest, RegistryImageInfo, + RegistryImageListResponse, ReloadVmsResponse, ResizeVmRequest, ResourcesSettings, StatusRequest, StatusResponse, SvListResponse, SvProcessInfo, UpdateVmRequest, VersionResponse, VmConfiguration, }; use fs_err as fs; +use or_panic::ResultOrPanic; use ra_rpc::{CallContext, RpcCall}; use tracing::{info, warn}; @@ -613,6 +615,129 @@ impl VmmRpc for RpcHandler { self.app.supervisor.remove(&request.id).await?; Ok(()) } + + async fn list_registry_images(self) -> Result { + let registry = &self.app.config.image.registry; + if registry.is_empty() { + return Ok(RegistryImageListResponse { images: vec![] }); + } + + let tags = crate::app::registry::list_registry_tags(registry) + .await + .context("failed to list registry tags")?; + + // Get local images to mark which are already downloaded + let local_images = self.app.list_images()?; + let local_names: std::collections::HashSet = + local_images.into_iter().map(|(name, _)| name).collect(); + + let pull_status = self.app.pull_status.lock().or_panic("mutex poisoned"); + + // Filter to version-like tags (skip sha256-* hash tags) + let images = tags + .into_iter() + .filter(|tag| !tag.starts_with("sha256-")) + .map(|tag| { + let local_name = if tag.starts_with("dstack-") { + tag.clone() + } else { + format!("dstack-{tag}") + }; + let is_local = local_names.contains(&local_name); + let (is_pulling, error) = match pull_status.get(&tag) { + Some(crate::app::PullStatus::Pulling) => (true, String::new()), + Some(crate::app::PullStatus::Failed(msg)) => (false, msg.clone()), + None => (false, String::new()), + }; + RegistryImageInfo { + tag, + local: is_local, + pulling: is_pulling, + error, + } + }) + .collect(); + + Ok(RegistryImageListResponse { images }) + } + + async fn delete_image(self, request: Id) -> Result<()> { + let name = &request.id; + if name.is_empty() || name.contains("..") || name.contains('/') { + bail!("invalid image name"); + } + + // Check no VM uses this image + { + let state = self.app.lock(); + for vm in state.iter_vms() { + if vm.config.manifest.image == *name { + bail!( + "cannot delete image '{}': in use by VM '{}'", + name, + vm.config.manifest.name, + ); + } + } + } + + let image_dir = self.app.config.image.path.join(name); + if !image_dir.exists() { + bail!("image '{}' not found", name); + } + + fs_err::remove_dir_all(&image_dir).with_context(|| { + format!("failed to delete image directory: {}", image_dir.display()) + })?; + + info!("deleted local image: {name}"); + Ok(()) + } + + async fn pull_registry_image(self, request: PullRegistryImageRequest) -> Result<()> { + let registry = &self.app.config.image.registry; + if registry.is_empty() { + bail!("image registry is not configured"); + } + + // Check if already pulling + { + let mut status = self.app.pull_status.lock().or_panic("mutex poisoned"); + if matches!( + status.get(&request.tag), + Some(crate::app::PullStatus::Pulling) + ) { + bail!("image {} is already being pulled", request.tag); + } + status.insert(request.tag.clone(), crate::app::PullStatus::Pulling); + } + + // Spawn background task + let tag = request.tag.clone(); + let registry = registry.clone(); + let image_path = self.app.config.image.path.clone(); + let pull_status = self.app.pull_status.clone(); + + info!("starting background pull for {tag}"); + tokio::spawn(async move { + let result = crate::app::registry::pull_and_extract(®istry, &tag, &image_path).await; + + let mut status = pull_status.lock().unwrap_or_else(|e| e.into_inner()); + match result { + Ok(()) => { + status.remove(&tag); + info!("registry image {tag} pulled successfully"); + } + Err(e) => { + let msg = format!("{e:#}"); + tracing::error!("failed to pull registry image {tag}: {msg}"); + status.insert(tag, crate::app::PullStatus::Failed(msg)); + } + } + }); + + Ok(()) + } } impl RpcCall for RpcHandler { diff --git a/vmm/src/one_shot.rs b/vmm/src/one_shot.rs index cbc33f77f..39f9d68f4 100644 --- a/vmm/src/one_shot.rs +++ b/vmm/src/one_shot.rs @@ -79,7 +79,7 @@ pub async fn run_one_shot( let manifest = create_manifest_from_vm_config(vm_config.clone(), &config.cvm)?; // Load image - let image_path = config.image_path.join(&manifest.image); + let image_path = config.image.path.join(&manifest.image); let image = Image::load(&image_path) .with_context(|| format!("Failed to load image: {}", image_path.display()))?; diff --git a/vmm/ui/src/composables/useVmManager.ts b/vmm/ui/src/composables/useVmManager.ts index b4b41abe7..804d56fac 100644 --- a/vmm/ui/src/composables/useVmManager.ts +++ b/vmm/ui/src/composables/useVmManager.ts @@ -1475,6 +1475,82 @@ type CreateVmPayloadSource = { return features.length > 0 ? features.join(', ') : 'None'; } + // ── Image Registry ───────────────────────────────────────────── + const showImageRegistry = ref(false); + const registryImages = ref([] as Array<{ tag: string; local: boolean; pulling: boolean; error: string }>); + const registryLoading = ref(false); + let registryRefreshTimer: ReturnType | null = null; + + async function loadRegistryImages() { + const isInitialLoad = registryImages.value.length === 0; + if (isInitialLoad) { + registryLoading.value = true; + } + try { + const data = await vmmRpc.listRegistryImages({}); + registryImages.value = (data.images || []).sort((a: any, b: any) => { + // Sort by tag descending (newest versions first) + return (b.tag || '').localeCompare(a.tag || '', undefined, { numeric: true }); + }); + // If any image is pulling, refresh local images too + if (registryImages.value.some((img: any) => img.pulling)) { + loadImages(); + } + } catch (error) { + recordError('failed to load registry images', error); + } finally { + registryLoading.value = false; + } + } + + async function pullRegistryImage(tag: string) { + // Optimistic update: mark as pulling immediately, clear previous error + const img = registryImages.value.find((i: any) => i.tag === tag); + if (img) { + img.pulling = true; + img.error = ''; + } + try { + await vmmRpc.pullRegistryImage({ tag }); + } catch (error) { + // Revert optimistic update on failure + if (img) { + img.pulling = false; + } + recordError(`failed to pull image ${tag}`, error); + } + } + + async function deleteImage(name: string) { + if (!confirm(`Delete local image "${name}"?`)) return; + try { + await vmmRpc.deleteImage({ id: name }); + await loadImages(); + await loadRegistryImages(); + } catch (error) { + recordError(`failed to delete image ${name}`, error); + } + } + + async function openImageRegistry() { + showImageRegistry.value = true; + await Promise.all([loadImages(), loadRegistryImages()]); + registryRefreshTimer = setInterval(async () => { + await loadRegistryImages(); + // Refresh local images if something just finished pulling + if (registryImages.value.some((img: any) => img.pulling)) { + await loadImages(); + } + }, 3000); + } + + watch(showImageRegistry, (open) => { + if (!open && registryRefreshTimer) { + clearInterval(registryRefreshTimer); + registryRefreshTimer = null; + } + }); + // ── Process Manager ───────────────────────────────────────────── const showProcessManager = ref(false); const supervisorProcesses = ref([] as any[]); @@ -1636,6 +1712,13 @@ type CreateVmPayloadSource = { svStatusClass, svIsRunning, svIsStopped, + showImageRegistry, + registryImages, + registryLoading, + loadRegistryImages, + pullRegistryImage, + deleteImage, + openImageRegistry, }; } diff --git a/vmm/ui/src/templates/app.html b/vmm/ui/src/templates/app.html index 439c59e03..7b6a63bdc 100644 --- a/vmm/ui/src/templates/app.html +++ b/vmm/ui/src/templates/app.html @@ -35,6 +35,12 @@

dstack-vmm

Reload VMs + + + + +
+ +

Local

+
No local images found.
+ + + + + + + + + + + + + + + +
NameVersionActions
{{ img.name }}{{ img.version }} + +
+ + +

Registry

+
Loading registry tags...
+
+ No registry configured. Set [image] registry in vmm.toml. +
+ + + + + + + + + + + + + + + + + + +
TagStatusActions
{{ img.tag }} + + + Pulling... + + + + Failed + + + + Local + + + + Remote + + + + Downloading + Downloaded +
+ ⚠ {{ img.error }} +
+
+ +

Supervisor Processes

diff --git a/vmm/vmm.toml b/vmm/vmm.toml index 4b8514a4a..ba8e2a88a 100644 --- a/vmm/vmm.toml +++ b/vmm/vmm.toml @@ -14,6 +14,12 @@ kms_url = "http://127.0.0.1:8081" event_buffer_size = 20 node_name = "" +[image] +# Path to guest image directory (default: ~/.dstack-vmm/image) +# path = "" +# OCI image registry for guest images (e.g., "dstacktee/guest-image") +registry = "" + [cvm] qemu_path = "" kms_urls = ["http://127.0.0.1:8081"] From 99328ed12c330e402fb1d8bbb46b09c96b317eb6 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sat, 21 Mar 2026 03:34:55 +0000 Subject: [PATCH 2/2] feat(vmm): add OCI image packaging script and registry docs - Add scripts/dstack-image-oci.sh for pushing guest images to OCI registries - Document registry setup in guest-image-setup.md (push, VMM config, pull flow) --- docs/tutorials/guest-image-setup.md | 53 ++++++ scripts/dstack-image-oci.sh | 263 ++++++++++++++++++++++++++++ 2 files changed, 316 insertions(+) create mode 100755 scripts/dstack-image-oci.sh diff --git a/docs/tutorials/guest-image-setup.md b/docs/tutorials/guest-image-setup.md index f53d36543..e5ab2f39f 100644 --- a/docs/tutorials/guest-image-setup.md +++ b/docs/tutorials/guest-image-setup.md @@ -230,6 +230,59 @@ The `image_path` should point to `/var/lib/dstack/images`. If VMM isn't finding the images, verify the path in the configuration matches where you installed them. +## OCI Registry Setup + +Guest images can be stored in any OCI-compatible container registry (Docker Hub, GHCR, Harbor, etc.), allowing VMM to discover and pull images directly from the web UI. + +### Pushing Images to a Registry + +Use the `dstack-image-oci.sh` script to package and push a guest image directory: + +```bash +# Push a standard image (auto-tags: version + sha256-hash) +./scripts/dstack-image-oci.sh push /var/lib/dstack/images/dstack-0.5.8 ghcr.io/your-org/guest-image + +# Push an nvidia variant +./scripts/dstack-image-oci.sh push /var/lib/dstack/images/dstack-nvidia-0.5.8 ghcr.io/your-org/guest-image + +# Push with a custom tag +./scripts/dstack-image-oci.sh push /var/lib/dstack/images/dstack-0.5.8 ghcr.io/your-org/guest-image --tag latest + +# List tags in the registry +./scripts/dstack-image-oci.sh list ghcr.io/your-org/guest-image +``` + +The script reads `metadata.json` and `digest.txt` from the image directory and auto-generates tags: + +| Image directory | Generated tags | +|---|---| +| `dstack-0.5.8` | `0.5.8`, `sha256-` | +| `dstack-dev-0.5.8` | `dev-0.5.8`, `sha256-` | +| `dstack-nvidia-0.5.8` | `nvidia-0.5.8`, `sha256-` | + +Prerequisites: `docker` CLI (for building), `python3`, registry login (`docker login`). + +### Configuring VMM to Use a Registry + +Add the `[image]` section to `vmm.toml`: + +```toml +[image] +# Local image directory (default: ~/.dstack-vmm/image) +# path = "/var/lib/dstack/images" + +# OCI registry for discovering and pulling images +registry = "ghcr.io/your-org/guest-image" +``` + +After restarting VMM, click **Images** in the web UI to browse the registry. Click **Pull** to download an image — it will be extracted to the local image directory automatically. + +### How It Works + +- **Push**: The script builds a `FROM scratch` Docker image containing the guest image files (kernel, initrd, rootfs, firmware, metadata) and pushes it to the registry. +- **Pull**: VMM fetches the OCI manifest via the Registry HTTP API v2, downloads each layer blob, and extracts the tar contents into the local image directory. No Docker daemon required on the VMM host. +- **Discovery**: VMM queries the registry's tag list API to show available versions alongside locally installed images. + ## Managing Multiple Image Versions You can have multiple image versions installed simultaneously: diff --git a/scripts/dstack-image-oci.sh b/scripts/dstack-image-oci.sh new file mode 100755 index 000000000..f8b986224 --- /dev/null +++ b/scripts/dstack-image-oci.sh @@ -0,0 +1,263 @@ +#!/bin/bash +# SPDX-FileCopyrightText: © 2025 Phala Network +# SPDX-License-Identifier: Apache-2.0 +# +# dstack guest image OCI packaging tool +# Pack and push dstack guest OS images to an OCI-compatible container registry. +set -euo pipefail + +usage() { + cat < [options] + +Commands: + push [--tag ] Pack and push image to registry + list [--filter ] List available tags in registry + +Arguments: + Path to a dstack guest image directory (contains metadata.json) + Full image reference (e.g., ghcr.io/org/guest-image) + +Examples: + $0 push ./dstack-0.5.8 cr.kvin.wang/dstack/guest-image + $0 push ./dstack-nvidia-0.5.8 ghcr.io/dstack-tee/guest-image --tag nvidia-0.5.8 + $0 list cr.kvin.wang/dstack/guest-image + $0 list cr.kvin.wang/dstack/guest-image --filter nvidia +EOF + exit 1 +} + +COMMAND="${1:-}" +[ -z "$COMMAND" ] && usage +shift + +# --- PUSH --- +cmd_push() { + local image_dir="" + local image_ref="" + local extra_tag="" + + while [ $# -gt 0 ]; do + case "$1" in + --tag) extra_tag="$2"; shift 2 ;; + -h|--help) usage ;; + -*) echo "Unknown option: $1"; exit 1 ;; + *) + if [ -z "$image_dir" ]; then + image_dir="$1" + elif [ -z "$image_ref" ]; then + image_ref="$1" + else + echo "Unexpected argument: $1"; exit 1 + fi + shift + ;; + esac + done + + [ -z "$image_dir" ] && { echo "Error: image directory required"; usage; } + [ -z "$image_ref" ] && { echo "Error: image reference required"; usage; } + [ -d "$image_dir" ] || { echo "Error: $image_dir is not a directory"; exit 1; } + + local metadata="$image_dir/metadata.json" + [ -f "$metadata" ] || { echo "Error: metadata.json not found in $image_dir"; exit 1; } + + # Read image info + local version + version=$(python3 -c "import json; print(json.load(open('$metadata'))['version'])") + local digest_file="$image_dir/digest.txt" + local os_image_hash="" + if [ -f "$digest_file" ]; then + os_image_hash=$(tr -d '\n\r' < "$digest_file") + fi + + # Detect image variant from directory name + local dirname + dirname=$(basename "$image_dir") + local variant="" + if [[ "$dirname" == *-nvidia-dev-* ]]; then + variant="nvidia-dev" + elif [[ "$dirname" == *-nvidia-* ]]; then + variant="nvidia" + elif [[ "$dirname" == *-dev-* ]]; then + variant="dev" + elif [[ "$dirname" == *-cloud-* ]]; then + variant="cloud" + fi + + # Build tag list + local tags=() + if [ -n "$extra_tag" ]; then + tags+=("$extra_tag") + else + # Auto-generate tags from variant + version + if [ -n "$variant" ]; then + tags+=("${variant}-${version}") + else + tags+=("${version}") + fi + if [ -n "$os_image_hash" ]; then + tags+=("sha256-${os_image_hash}") + fi + fi + + echo "=== Packing dstack guest image ===" + echo " Source: $image_dir" + echo " Version: $version" + echo " Variant: ${variant:-standard}" + echo " Hash: ${os_image_hash:-}" + echo " Registry: $image_ref" + echo " Tags: ${tags[*]}" + echo "" + + # Create build context in a temp directory + local tmp_dir + tmp_dir=$(mktemp -d) + trap 'rm -rf "$tmp_dir"' EXIT + + # Collect all files + local files=() + for f in "$image_dir"/*; do + [ -f "$f" ] && files+=("$(basename "$f")") + done + + # Generate Dockerfile + { + echo "FROM scratch" + for f in "${files[@]}"; do + echo "COPY $f /" + done + echo "LABEL org.opencontainers.image.title=\"dstack-guest-image\"" + echo "LABEL org.opencontainers.image.version=\"$version\"" + echo "LABEL wang.dstack.os-image-hash=\"${os_image_hash}\"" + echo "LABEL wang.dstack.variant=\"${variant:-standard}\"" + } > "$tmp_dir/Dockerfile" + + # Copy files to build context + for f in "${files[@]}"; do + cp "$image_dir/$f" "$tmp_dir/" + done + + # Build + local primary_ref="${image_ref}:${tags[0]}" + echo "Building: $primary_ref" + docker build -t "$primary_ref" "$tmp_dir" + + # Tag additional tags + for ((i=1; i<${#tags[@]}; i++)); do + local ref="${image_ref}:${tags[$i]}" + echo "Tagging: $ref" + docker tag "$primary_ref" "$ref" + done + + # Push all tags + for tag in "${tags[@]}"; do + local ref="${image_ref}:${tag}" + echo "Pushing: $ref" + docker push "$ref" + done + + # Build and push measurement-only image (no rootfs, for verifier) + if [ -n "$os_image_hash" ]; then + local mr_tag="mr-sha256-${os_image_hash}" + local mr_dir + mr_dir=$(mktemp -d) + + # Read rootfs filename from metadata to exclude it + local rootfs_name + rootfs_name=$(python3 -c "import json; print(json.load(open('$metadata')).get('rootfs', ''))") + + # Collect files excluding rootfs + local mr_files=() + for f in "${files[@]}"; do + if [ "$f" != "$rootfs_name" ]; then + mr_files+=("$f") + cp "$image_dir/$f" "$mr_dir/" + fi + done + + { + echo "FROM scratch" + for f in "${mr_files[@]}"; do + echo "COPY $f /" + done + echo "LABEL org.opencontainers.image.title=\"dstack-guest-image-mr\"" + echo "LABEL org.opencontainers.image.version=\"$version\"" + echo "LABEL wang.dstack.os-image-hash=\"${os_image_hash}\"" + echo "LABEL wang.dstack.variant=\"${variant:-standard}\"" + echo "LABEL wang.dstack.measurement-only=\"true\"" + } > "$mr_dir/Dockerfile" + + local mr_ref="${image_ref}:${mr_tag}" + echo "" + echo "Building measurement image (no rootfs): $mr_ref" + echo " Files: ${mr_files[*]}" + docker build -t "$mr_ref" "$mr_dir" + + echo "Pushing: $mr_ref" + docker push "$mr_ref" + + rm -rf "$mr_dir" + tags+=("$mr_tag") + fi + + echo "" + echo "=== Done ===" + for tag in "${tags[@]}"; do + echo " ${image_ref}:${tag}" + done +} + +# --- LIST --- +cmd_list() { + local image_ref="" + local filter="" + + while [ $# -gt 0 ]; do + case "$1" in + --filter) filter="$2"; shift 2 ;; + -h|--help) usage ;; + -*) echo "Unknown option: $1"; exit 1 ;; + *) + if [ -z "$image_ref" ]; then + image_ref="$1" + else + echo "Unexpected argument: $1"; exit 1 + fi + shift + ;; + esac + done + + [ -z "$image_ref" ] && { echo "Error: image reference required"; usage; } + + echo "=== Tags for ${image_ref} ===" + + # Parse registry and repo from image_ref + local registry repo + registry="${image_ref%%/*}" + repo="${image_ref#*/}" + + local tags_json + tags_json=$(skopeo list-tags "docker://${image_ref}" 2>/dev/null || \ + curl -sf "https://${registry}/v2/${repo}/tags/list" 2>/dev/null || \ + echo '{"tags":[]}') + + python3 -c " +import json, sys, re +data = json.load(sys.stdin) +tags = sorted(data.get('Tags', data.get('tags', []))) +filt = '$filter' +for tag in tags: + if not filt or re.search(filt, tag): + print(f' {tag}') +" <<< "$tags_json" +} + +# Dispatch +case "$COMMAND" in + push) cmd_push "$@" ;; + list) cmd_list "$@" ;; + -h|--help) usage ;; + *) echo "Unknown command: $COMMAND"; usage ;; +esac