diff --git a/Cargo.lock b/Cargo.lock index fd907f3ac5..93ab10d31b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8988,7 +8988,6 @@ dependencies = [ "task_control", "test_with_tracing", "tracelimit", - "tracing", "unicycle", "virtio", "virtio_resources", diff --git a/vm/devices/virtio/virtio/src/common.rs b/vm/devices/virtio/virtio/src/common.rs index 7c6dbdf8e3..54df91e8f8 100644 --- a/vm/devices/virtio/virtio/src/common.rs +++ b/vm/devices/virtio/virtio/src/common.rs @@ -462,7 +462,14 @@ pub trait VirtioDevice: inspect::InspectMut + Send { fn traits(&self) -> DeviceTraits; fn read_registers_u32(&self, offset: u16) -> u32; fn write_registers_u32(&mut self, offset: u16, val: u32); - fn enable(&mut self, resources: Resources); + /// Enable the device with the given resources. + /// + /// Called when the guest sets `DRIVER_OK`. On success, the device should + /// start processing queues and the transport will reflect `DRIVER_OK` in + /// the device status. On failure, the transport will log the error and + /// leave `DRIVER_OK` unset, so the device remains inert and the guest + /// will observe failures through IO timeouts. + fn enable(&mut self, resources: Resources) -> anyhow::Result<()>; /// Poll the device to complete a disable/reset operation. /// /// This is called when the guest writes status=0 (device reset). The device diff --git a/vm/devices/virtio/virtio/src/tests.rs b/vm/devices/virtio/virtio/src/tests.rs index c43d92be3d..7aaed1b06d 100644 --- a/vm/devices/virtio/virtio/src/tests.rs +++ b/vm/devices/virtio/virtio/src/tests.rs @@ -778,6 +778,34 @@ struct VirtioPciTestDevice { type TestDeviceQueueWorkFn = Arc; +/// A minimal VirtioDevice whose enable() always returns an error. +/// Used to test that transports correctly handle enable failures. +#[derive(InspectMut)] +#[inspect(skip)] +struct FailingTestDevice { + traits: DeviceTraits, +} + +impl VirtioDevice for FailingTestDevice { + fn traits(&self) -> DeviceTraits { + self.traits.clone() + } + + fn read_registers_u32(&self, _offset: u16) -> u32 { + 0 + } + + fn write_registers_u32(&mut self, _offset: u16, _val: u32) {} + + fn enable(&mut self, _resources: Resources) -> anyhow::Result<()> { + anyhow::bail!("intentional enable failure for testing") + } + + fn poll_disable(&mut self, _cx: &mut std::task::Context<'_>) -> std::task::Poll<()> { + std::task::Poll::Ready(()) + } +} + #[derive(InspectMut)] #[inspect(skip)] struct TestDevice { @@ -818,7 +846,7 @@ impl VirtioDevice for TestDevice { fn write_registers_u32(&mut self, _offset: u16, _val: u32) {} - fn enable(&mut self, resources: Resources) { + fn enable(&mut self, resources: Resources) -> anyhow::Result<()> { self.workers = resources .queues .into_iter() @@ -843,6 +871,7 @@ impl VirtioDevice for TestDevice { )) }) .collect(); + Ok(()) } fn poll_disable(&mut self, cx: &mut std::task::Context<'_>) -> std::task::Poll<()> { @@ -1912,3 +1941,151 @@ async fn verify_device_multi_queue_pci(driver: DefaultDriver) { .unwrap(); drop(dev); } + +#[async_test] +async fn verify_enable_failure_mmio_does_not_set_driver_ok(_driver: DefaultDriver) { + let test_mem = VirtioTestMemoryAccess::new(); + let doorbell_registration: Arc = test_mem.clone(); + let interrupt = LineInterrupt::detached(); + + let mut dev = VirtioMmioDevice::new( + Box::new(FailingTestDevice { + traits: DeviceTraits { + device_id: 3, + device_features: VirtioDeviceFeatures::new().with_bank(0, 2), + max_queues: 1, + device_register_length: 0, + ..Default::default() + }, + }), + interrupt, + Some(doorbell_registration), + 0, + 1, + ); + + // Drive through ACKNOWLEDGE -> DRIVER -> FEATURES_OK -> DRIVER_OK + dev.write_u32(112, VIRTIO_ACKNOWLEDGE); + dev.write_u32(112, VIRTIO_DRIVER); + dev.write_u32(36, 0); + dev.write_u32(32, 2); // select matching features + dev.write_u32(36, 1); + dev.write_u32(32, VIRTIO_F_VERSION_1); + dev.write_u32(112, VIRTIO_FEATURES_OK); + + // Set up one queue + dev.write_u32(48, 0); // queue select + dev.write_u32(56, 16); // queue size + dev.write_u32(68, 1); // queue enable + + // Attempt DRIVER_OK — enable() will fail + dev.write_u32(112, VIRTIO_DRIVER_OK); + + // Device status should NOT have DRIVER_OK set + let status = dev.read_u32(112); + assert_eq!( + status & VIRTIO_DRIVER_OK, + 0, + "DRIVER_OK must not be set when enable() fails" + ); +} + +#[async_test] +async fn verify_enable_failure_pci_does_not_set_driver_ok(_driver: DefaultDriver) { + let test_mem = VirtioTestMemoryAccess::new(); + let doorbell_registration: Arc = test_mem.clone(); + let msi_conn = MsiConnection::new(); + + let mut dev = VirtioPciDevice::new( + Box::new(FailingTestDevice { + traits: DeviceTraits { + device_id: 3, + device_features: VirtioDeviceFeatures::new().with_bank(0, 2), + max_queues: 1, + device_register_length: 12, + ..Default::default() + }, + }), + PciInterruptModel::Msix(msi_conn.target()), + Some(doorbell_registration), + &mut ExternallyManagedMmioIntercepts, + None, + ) + .unwrap(); + + let bar_address1: u64 = 0x10000000000; + dev.pci_cfg_write(0x14, (bar_address1 >> 32) as u32) + .unwrap(); + dev.pci_cfg_write(0x10, bar_address1 as u32).unwrap(); + + let bar_address2: u64 = 0x20000000000; + dev.pci_cfg_write(0x1c, (bar_address2 >> 32) as u32) + .unwrap(); + dev.pci_cfg_write(0x18, bar_address2 as u32).unwrap(); + + dev.pci_cfg_write( + 0x4, + cfg_space::Command::new() + .with_mmio_enabled(true) + .into_bits() as u32, + ) + .unwrap(); + + // Drive through ACKNOWLEDGE -> DRIVER -> FEATURES_OK + let mut buf = [0u8; 1]; + buf[0] = VIRTIO_ACKNOWLEDGE as u8; + dev.mmio_write(bar_address1 + 20, &buf).unwrap(); + buf[0] = VIRTIO_DRIVER as u8; + dev.mmio_write(bar_address1 + 20, &buf).unwrap(); + // Select features + let mut val; + val = 0u32.to_le_bytes(); + dev.mmio_write(bar_address1 + 8, &val).unwrap(); + val = 2u32.to_le_bytes(); + dev.mmio_write(bar_address1 + 12, &val).unwrap(); + val = 1u32.to_le_bytes(); + dev.mmio_write(bar_address1 + 8, &val).unwrap(); + val = VIRTIO_F_VERSION_1.to_le_bytes(); + dev.mmio_write(bar_address1 + 12, &val).unwrap(); + buf[0] = VIRTIO_FEATURES_OK as u8; + dev.mmio_write(bar_address1 + 20, &buf).unwrap(); + + // Set up queue 0 + dev.mmio_write(bar_address1 + 22, &0u16.to_le_bytes()) + .unwrap(); // queue select + dev.mmio_write(bar_address1 + 24, &16u16.to_le_bytes()) + .unwrap(); // queue size + // Set up MSI for the queue + dev.mmio_write(bar_address2, &0u64.to_le_bytes()).unwrap(); + dev.mmio_write(bar_address2 + 8, &0u32.to_le_bytes()) + .unwrap(); + dev.mmio_write(bar_address2 + 12, &0u32.to_le_bytes()) + .unwrap(); + let msix_vector: u16 = 1; + let msix_addr = bar_address2 + 0x10 * msix_vector as u64; + dev.mmio_write(msix_addr, &(msix_vector as u64).to_le_bytes()) + .unwrap(); + dev.mmio_write(msix_addr + 8, &0u32.to_le_bytes()).unwrap(); + dev.mmio_write(msix_addr + 12, &0u32.to_le_bytes()).unwrap(); + dev.mmio_write(bar_address1 + 26, &msix_vector.to_le_bytes()) + .unwrap(); + // Enable queue + dev.mmio_write(bar_address1 + 28, &1u16.to_le_bytes()) + .unwrap(); + // Enable all MSI interrupts + dev.pci_cfg_write(0x40, 0x80000000).unwrap(); + + // Attempt DRIVER_OK — enable() will fail + buf[0] = VIRTIO_DRIVER_OK as u8; + dev.mmio_write(bar_address1 + 20, &buf).unwrap(); + + // Read back device status + let mut status_buf = [0u8; 1]; + dev.mmio_read(bar_address1 + 20, &mut status_buf).unwrap(); + let status = status_buf[0] as u32; + assert_eq!( + status & VIRTIO_DRIVER_OK, + 0, + "DRIVER_OK must not be set when enable() fails" + ); +} diff --git a/vm/devices/virtio/virtio/src/transport/mmio.rs b/vm/devices/virtio/virtio/src/transport/mmio.rs index 6e987a84a7..eb9bac94c1 100644 --- a/vm/devices/virtio/virtio/src/transport/mmio.rs +++ b/vm/devices/virtio/virtio/src/transport/mmio.rs @@ -432,14 +432,27 @@ impl VirtioMmioDevice { }) .collect(); - self.device.enable(Resources { + match self.device.enable(Resources { features: self.driver_feature.clone(), queues, shared_memory_region: None, shared_memory_size: 0, - }); - - self.device_status.set_driver_ok(true); + }) { + Ok(()) => { + self.device_status.set_driver_ok(true); + } + Err(err) => { + self.doorbells.clear(); + // FUTURE: consider setting DEVICE_NEEDS_RESET and + // delivering a config change interrupt so the guest + // can detect the failure proactively instead of + // waiting for IO timeouts. + tracelimit::error_ratelimited!( + error = &*err as &dyn std::error::Error, + "virtio device enable failed" + ); + } + } self.update_config_generation(); } } diff --git a/vm/devices/virtio/virtio/src/transport/pci.rs b/vm/devices/virtio/virtio/src/transport/pci.rs index 87bf1ed66b..144abd59f8 100644 --- a/vm/devices/virtio/virtio/src/transport/pci.rs +++ b/vm/devices/virtio/virtio/src/transport/pci.rs @@ -492,14 +492,27 @@ impl VirtioPciDevice { }) .collect(); - self.device.enable(Resources { + match self.device.enable(Resources { features: self.driver_feature.clone(), queues, shared_memory_region: self.shared_memory_region.clone(), shared_memory_size: self.shared_memory_size, - }); - - self.device_status.set_driver_ok(true); + }) { + Ok(()) => { + self.device_status.set_driver_ok(true); + } + Err(err) => { + self.doorbells.clear(); + // FUTURE: consider setting DEVICE_NEEDS_RESET and + // delivering a config change interrupt so the guest + // can detect the failure proactively instead of + // waiting for IO timeouts. + tracelimit::error_ratelimited!( + error = &*err as &dyn std::error::Error, + "virtio device enable failed" + ); + } + } self.update_config_generation(); } } diff --git a/vm/devices/virtio/virtio_blk/Cargo.toml b/vm/devices/virtio/virtio_blk/Cargo.toml index 7287037b65..4c96dbf29c 100644 --- a/vm/devices/virtio/virtio_blk/Cargo.toml +++ b/vm/devices/virtio/virtio_blk/Cargo.toml @@ -24,7 +24,6 @@ vmcore.workspace = true anyhow.workspace = true async-trait.workspace = true futures.workspace = true -tracing.workspace = true unicycle.workspace = true zerocopy.workspace = true diff --git a/vm/devices/virtio/virtio_blk/src/integration_tests.rs b/vm/devices/virtio/virtio_blk/src/integration_tests.rs index 20fb567e11..dd9f19e032 100644 --- a/vm/devices/virtio/virtio_blk/src/integration_tests.rs +++ b/vm/devices/virtio/virtio_blk/src/integration_tests.rs @@ -222,7 +222,7 @@ impl TestHarness { shared_memory_size: 0, }; - self.device.enable(resources); + self.device.enable(resources).unwrap(); } /// Allocate a data region in guest memory and return its GPA. diff --git a/vm/devices/virtio/virtio_blk/src/lib.rs b/vm/devices/virtio/virtio_blk/src/lib.rs index 0ca7d2504f..38a2219cc9 100644 --- a/vm/devices/virtio/virtio_blk/src/lib.rs +++ b/vm/devices/virtio/virtio_blk/src/lib.rs @@ -12,6 +12,7 @@ mod spec; mod integration_tests; use crate::spec::*; +use anyhow::Context as _; use disk_backend::Disk; use futures::StreamExt; use guestmem::GuestMemory; @@ -348,43 +349,27 @@ impl VirtioDevice for VirtioBlkDevice { // Config space is read-only for virtio-blk. } - fn enable(&mut self, resources: Resources) { + fn enable(&mut self, resources: Resources) -> anyhow::Result<()> { let queue_resources = resources.queues.into_iter().next(); let Some(queue_resources) = queue_resources else { - return; + return Ok(()); }; if !queue_resources.params.enable { - return; + return Ok(()); } - let queue_event = match PolledWait::new(&self.driver, queue_resources.event) { - Ok(e) => e, - Err(err) => { - tracing::error!( - error = &err as &dyn std::error::Error, - "failed to create queue event" - ); - return; - } - }; + let queue_event = PolledWait::new(&self.driver, queue_resources.event) + .context("failed to create queue event")?; - let queue = match VirtioQueue::new( + let queue = VirtioQueue::new( resources.features, queue_resources.params, self.worker.task().memory.clone(), queue_resources.notify, queue_event, - ) { - Ok(q) => q, - Err(err) => { - tracing::error!( - error = &err as &dyn std::error::Error, - "failed to create virtio queue" - ); - return; - } - }; + ) + .context("failed to create virtio queue")?; self.worker.insert( self.driver.clone(), @@ -392,6 +377,7 @@ impl VirtioDevice for VirtioBlkDevice { BlkQueueState { queue }, ); self.worker.start(); + Ok(()) } fn poll_disable(&mut self, cx: &mut Context<'_>) -> Poll<()> { diff --git a/vm/devices/virtio/virtio_net/src/lib.rs b/vm/devices/virtio/virtio_net/src/lib.rs index f432be818f..a3109b3128 100644 --- a/vm/devices/virtio/virtio_net/src/lib.rs +++ b/vm/devices/virtio/virtio_net/src/lib.rs @@ -18,6 +18,7 @@ pub mod resolver; mod tests; use crate::buffers::VirtioWorkPool; +use anyhow::Context as _; use bitfield_struct::bitfield; use guestmem::GuestMemory; use inspect::Inspect; @@ -263,7 +264,7 @@ impl VirtioDevice for Device { fn write_registers_u32(&mut self, _offset: u16, _val: u32) {} - fn enable(&mut self, resources: Resources) { + fn enable(&mut self, resources: Resources) -> anyhow::Result<()> { let mut queue_resources: Vec<_> = resources.queues.into_iter().collect(); let mut workers = Vec::with_capacity(queue_resources.len() / 2); while queue_resources.len() > 1 { @@ -275,56 +276,33 @@ impl VirtioDevice for Device { } let rx_queue_size = rx_resources.params.size; - let rx_queue_event = PolledWait::new(&self.adapter.driver, rx_resources.event); - if let Err(err) = rx_queue_event { - tracing::error!( - err = &err as &dyn std::error::Error, - "Failed creating queue event" - ); - continue; - } + let rx_queue_event = PolledWait::new(&self.adapter.driver, rx_resources.event) + .context("failed creating rx queue event")?; let rx_queue = VirtioQueue::new( resources.features.clone(), rx_resources.params, self.memory.clone(), rx_resources.notify, - rx_queue_event.unwrap(), - ); - if let Err(err) = rx_queue { - tracing::error!( - err = &err as &dyn std::error::Error, - "Failed creating virtio net receive queue" - ); - continue; - } + rx_queue_event, + ) + .context("failed creating virtio net receive queue")?; let tx_queue_size = tx_resources.params.size; - let tx_queue_event = PolledWait::new(&self.adapter.driver, tx_resources.event); - if let Err(err) = tx_queue_event { - tracing::error!( - err = &err as &dyn std::error::Error, - "Failed creating queue event" - ); - continue; - } + let tx_queue_event = PolledWait::new(&self.adapter.driver, tx_resources.event) + .context("failed creating tx queue event")?; let tx_queue = VirtioQueue::new( resources.features.clone(), tx_resources.params, self.memory.clone(), tx_resources.notify, - tx_queue_event.unwrap(), - ); - if let Err(err) = tx_queue { - tracing::error!( - err = &err as &dyn std::error::Error, - "Failed creating virtio net transmit queue" - ); - continue; - } + tx_queue_event, + ) + .context("failed creating virtio net transmit queue")?; + workers.push(VirtioState { - rx_queue: rx_queue.unwrap(), + rx_queue, rx_queue_size, - tx_queue: tx_queue.unwrap(), + tx_queue, tx_queue_size, }); } @@ -334,6 +312,7 @@ impl VirtioDevice for Device { self.insert_worker(virtio_state, i); } self.coordinator.start(); + Ok(()) } fn poll_disable(&mut self, cx: &mut Context<'_>) -> Poll<()> { diff --git a/vm/devices/virtio/virtio_net/src/tests.rs b/vm/devices/virtio/virtio_net/src/tests.rs index cb0a0858d9..83f6e3b7b2 100644 --- a/vm/devices/virtio/virtio_net/src/tests.rs +++ b/vm/devices/virtio/virtio_net/src/tests.rs @@ -574,7 +574,7 @@ impl TestHarness { shared_memory_size: 0, }; - self.device.enable(resources); + self.device.enable(resources).unwrap(); // Wait for the mock endpoint to provide a queue handle mesh::CancelContext::new() diff --git a/vm/devices/virtio/virtio_p9/src/lib.rs b/vm/devices/virtio/virtio_p9/src/lib.rs index 9c432d8e92..3738534bec 100644 --- a/vm/devices/virtio/virtio_p9/src/lib.rs +++ b/vm/devices/virtio/virtio_p9/src/lib.rs @@ -7,6 +7,7 @@ pub mod resolver; +use anyhow::Context as _; use async_trait::async_trait; use guestmem::GuestMemory; use inspect::InspectMut; @@ -109,15 +110,15 @@ impl VirtioDevice for VirtioPlan9Device { tracing::warn!(offset, val, "[VIRTIO 9P] Unknown write",); } - fn enable(&mut self, resources: Resources) { + fn enable(&mut self, resources: Resources) -> anyhow::Result<()> { let queue_resources = resources .queues .into_iter() .next() - .expect("expected single queue"); + .context("expected single queue")?; if !queue_resources.params.enable { - return; + return Ok(()); } let worker = VirtioPlan9Worker { @@ -132,6 +133,7 @@ impl VirtioDevice for VirtioPlan9Device { queue_resources, self.exit_event.listen(), )); + Ok(()) } fn poll_disable(&mut self, cx: &mut Context<'_>) -> Poll<()> { diff --git a/vm/devices/virtio/virtio_pmem/src/lib.rs b/vm/devices/virtio/virtio_pmem/src/lib.rs index 5b90a6d8a6..b05c942eef 100644 --- a/vm/devices/virtio/virtio_pmem/src/lib.rs +++ b/vm/devices/virtio/virtio_pmem/src/lib.rs @@ -94,20 +94,20 @@ impl VirtioDevice for Device { fn write_registers_u32(&mut self, _offset: u16, _val: u32) {} - fn enable(&mut self, mut resources: Resources) { + fn enable(&mut self, mut resources: Resources) -> anyhow::Result<()> { assert!(self.worker.is_none()); if !resources.queues[0].params.enable { - return; + return Ok(()); } - let shared_memory_region = resources.shared_memory_region.clone(); - let _ = shared_memory_region.unwrap().map( - 0, - &self.mappable, - 0, - self.len as usize, - self.writable, - ); + let shared_memory_region = resources + .shared_memory_region + .clone() + .context("shared memory region not available")?; + + shared_memory_region + .map(0, &self.mappable, 0, self.len as usize, self.writable) + .context("failed to map shared memory region")?; self.worker = { let worker = PmemWorker { @@ -125,6 +125,7 @@ impl VirtioDevice for Device { self.exit_event.listen(), )) }; + Ok(()) } fn poll_disable(&mut self, cx: &mut std::task::Context<'_>) -> Poll<()> { diff --git a/vm/devices/virtio/virtiofs/src/virtio.rs b/vm/devices/virtio/virtiofs/src/virtio.rs index f03fedb41c..fa612212f0 100644 --- a/vm/devices/virtio/virtiofs/src/virtio.rs +++ b/vm/devices/virtio/virtiofs/src/virtio.rs @@ -133,7 +133,7 @@ impl VirtioDevice for VirtioFsDevice { tracing::warn!(offset, val, "[virtiofs] Unknown write",); } - fn enable(&mut self, resources: Resources) { + fn enable(&mut self, resources: Resources) -> anyhow::Result<()> { self.workers = resources .queues .into_iter() @@ -158,6 +158,7 @@ impl VirtioDevice for VirtioFsDevice { )) }) .collect(); + Ok(()) } fn poll_disable(&mut self, cx: &mut Context<'_>) -> Poll<()> {