Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crates/openshell-driver-vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ flate2 = "1"
sha2 = "0.10"
zstd = "0.13"

[dev-dependencies]
temp-env = "0.3"

# smol-rs/polling drives the BSD/macOS parent-death detection in
# procguard via kqueue's EVFILT_PROC / NOTE_EXIT filter. We could use
# it on Linux too (via epoll + pidfd) but sticking with
Expand Down
22 changes: 22 additions & 0 deletions crates/openshell-driver-vm/runtime/kernel/openshell.kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,28 @@ CONFIG_MEMCG=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y

# ── PCI/PCIe (required for GPU passthrough via QEMU vfio-pci) ────────────
# The libkrunfw base config disables CONFIG_PCI. GPU sandboxes using the
# QEMU backend pass the GPU through as a PCIe device on a q35 machine.
# Without PCI core support the guest kernel cannot see any PCI bus, so the
# nvidia driver loads but finds zero devices.
CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_PCIEPORTBUS=y

# ── Loadable kernel modules (required for GPU passthrough) ──────────────
# The libkrunfw base config disables CONFIG_MODULES. GPU sandboxes need it
# to load nvidia.ko, nvidia-uvm.ko, and nvidia-modeset.ko at boot via
# modprobe. Without this, the guest kernel rejects all module loads.
#
# SECURITY NOTE: This enables module loading for ALL VMs (including
# non-GPU), expanding the guest kernel attack surface. The sandbox
# supervisor's seccomp profile must block init_module/finit_module
# syscalls for the sandbox user to prevent arbitrary module loading.
# Tracked: consider per-purpose kernel builds (GPU vs non-GPU).
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y

# ── Security features required by the sandbox runtime ───────────────────
CONFIG_SECURITY_LANDLOCK=y
CONFIG_SECCOMP_FILTER=y
16 changes: 16 additions & 0 deletions crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,17 @@ create_gpu_device_nodes_mknod() {
setup_gpu() {
ts "GPU_ENABLED=true — initializing GPU passthrough"

# Kernel modules are built for a specific guest kernel version.
# If the running kernel doesn't match, depmod/modprobe will silently fail.
local expected_kver="6.12.76"
local actual_kver
actual_kver="$(uname -r)"
if [ "${actual_kver}" != "${expected_kver}" ]; then
ts "WARNING: kernel version mismatch: expected ${expected_kver}, got ${actual_kver}"
ts " GPU modules are installed under lib/modules/${expected_kver}/"
ts " modprobe may fail to find them"
fi

if ! command -v modprobe >/dev/null 2>&1; then
ts "FATAL: modprobe not found; cannot load nvidia kernel modules"
return 1
Expand All @@ -249,6 +260,11 @@ setup_gpu() {
fi
fi

ts "generating module dependency index"
if ! depmod -a "$(uname -r)" 2>/dev/null; then
ts "WARNING: depmod failed; modprobe may not find modules"
fi

ts "loading nvidia kernel modules"
modprobe nvidia || { ts "FATAL: modprobe nvidia failed"; return 1; }
modprobe nvidia_uvm 2>/dev/null || true
Expand Down
37 changes: 31 additions & 6 deletions crates/openshell-driver-vm/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use crate::gpu::{
GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name,
};
use crate::rootfs::{
create_rootfs_archive_from_dir, extract_rootfs_archive_to,
prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path,
create_rootfs_archive_from_dir, extract_rootfs_archive_to, inject_gpu_modules,
prepare_sandbox_rootfs_from_image_root, refresh_runtime_artifacts, sandbox_guest_init_path,
};
use bollard::Docker;
use bollard::errors::Error as BollardError;
Expand Down Expand Up @@ -419,6 +419,28 @@ impl VmDriver {
return Err(err);
}
};
if is_gpu {
let rootfs_for_gpu = rootfs.clone();
let driver_state_dir = self.config.state_dir.clone();
if let Err(err) = tokio::task::spawn_blocking(move || {
inject_gpu_modules(&rootfs_for_gpu, &driver_state_dir)
})
.await
.map_err(|e| Status::internal(format!("GPU module injection panicked: {e}")))?
{
warn!(
sandbox_id = %sandbox.id,
error = %err,
"vm driver: GPU module injection failed"
);
let _ = tokio::fs::remove_dir_all(&state_dir).await;
return Err(Status::failed_precondition(format!(
"GPU module injection failed: {err}"
)));
}
info!(sandbox_id = %sandbox.id, "vm driver: GPU modules injected into rootfs");
}

if let Some(tls_paths) = tls_paths.as_ref()
&& let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await
{
Expand Down Expand Up @@ -740,10 +762,13 @@ impl VmDriver {
.await?;
let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity);
let rootfs_dest = rootfs.to_path_buf();
tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest))
.await
.map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
.map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;
tokio::task::spawn_blocking(move || {
extract_rootfs_archive_to(&archive_path, &rootfs_dest)?;
refresh_runtime_artifacts(&rootfs_dest)
})
.await
.map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
.map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;

Ok(image_identity)
}
Expand Down
Loading
Loading