From 65be7ae66719f6ed836d1dafed25a3b10a58b8c5 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 18 Aug 2023 10:54:37 +0000 Subject: [PATCH] feat(vmm): add vhost-user-blk device This adds vhost-user block device implementation. In order to create a vhost-user block device, a PUT /drives request needs to be issued that contains `vhost-user` object. At the moment, the object has the only property `socket` that is a socket path to communicate with a vhost-user backend. Example: ``` curl --unix-socket ${fc_socket} -i \ -X PUT "http://localhost/drives/vhost" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"vhost\", \"is_root_device\": false, \"cache_type\": \"Unsafe\", \"vhost_user\": { \"socket\": \"${vhost_socket}\" } }" ``` Signed-off-by: Nikita Kalyazin Co-authored-by: Diana Popa --- .../seccomp/aarch64-unknown-linux-musl.json | 4 + .../seccomp/x86_64-unknown-linux-musl.json | 4 + src/vmm/src/builder.rs | 21 ++ src/vmm/src/device_manager/persist.rs | 127 +++++++-- src/vmm/src/devices/virtio/block/mod.rs | 1 + .../devices/virtio/block/vhost_user/device.rs | 262 ++++++++++++++++++ .../virtio/block/vhost_user/event_handler.rs | 74 +++++ .../devices/virtio/block/vhost_user/mod.rs | 35 +++ .../virtio/block/vhost_user/persist.rs | 122 ++++++++ src/vmm/src/devices/virtio/mod.rs | 5 + src/vmm/src/devices/virtio/vhost_user/mod.rs | 5 +- src/vmm/src/resources.rs | 5 + src/vmm/src/rpc_interface.rs | 4 + src/vmm/src/vmm_config/drive.rs | 124 ++++++++- 14 files changed, 760 insertions(+), 33 deletions(-) create mode 100644 src/vmm/src/devices/virtio/block/vhost_user/device.rs create mode 100644 src/vmm/src/devices/virtio/block/vhost_user/event_handler.rs create mode 100644 src/vmm/src/devices/virtio/block/vhost_user/mod.rs create mode 100644 src/vmm/src/devices/virtio/block/vhost_user/persist.rs diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index a8bb544f40e..c62a52f68d9 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -979,6 +979,10 @@ { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" + }, + { + "syscall": "sendmsg", + "comment": "Used by vhost-user frontend to communicate with the backend" } ] } diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index bcdf00edd4c..ad402a136b8 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -1183,6 +1183,10 @@ { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" + }, + { + "syscall": "sendmsg", + "comment": "Used by vhost-user frontend to communicate with the backend" } ] } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 448fd139af1..7ac208be433 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -967,6 +967,26 @@ fn attach_block_devices<'a, I: Iterator + Debug>( // The device mutex mustn't be locked here otherwise it will deadlock. attach_virtio_device(event_manager, vmm, id, block.clone(), cmdline)?; } + Block::VhostUserBacked(block) => { + let id = { + let locked = block.lock().expect("Poisoned lock"); + if locked.is_root_device() { + cmdline.insert_str(if let Some(partuuid) = locked.partuuid() { + format!("root=PARTUUID={}", partuuid) + } else { + // If no PARTUUID was specified for the root device, try with the + // /dev/vda. + "root=/dev/vda".to_string() + })?; + + let flags = if locked.is_read_only() { "ro" } else { "rw" }; + cmdline.insert_str(flags)?; + } + locked.id().clone() + }; + // The device mutex mustn't be locked here otherwise it will deadlock. + attach_virtio_device(event_manager, vmm, id, block.clone(), cmdline)?; + } } } Ok(()) @@ -1191,6 +1211,7 @@ pub mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; block_dev_configs.insert(block_device_config).unwrap(); } diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 89161895dfd..f78f753891f 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -24,6 +24,10 @@ use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonSt use crate::devices::virtio::balloon::{Balloon, BalloonError}; use crate::devices::virtio::block::file::persist::{BlockFileConstructorArgs, BlockFileState}; use crate::devices::virtio::block::file::{BlockFile, BlockFileError}; +use crate::devices::virtio::block::vhost_user::persist::{ + BlockVhostUserConstructorArgs, BlockVhostUserState, +}; +use crate::devices::virtio::block::vhost_user::{BlockVhostUser, BlockVhostUserError}; use crate::devices::virtio::net::persist::{ NetConstructorArgs, NetPersistError as NetError, NetState, }; @@ -38,8 +42,9 @@ use crate::devices::virtio::vsock::persist::{ }; use crate::devices::virtio::vsock::{Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError}; use crate::devices::virtio::{ - MmioTransport, VirtioDevice, SUBTYPE_BALLOON, SUBTYPE_BLOCK_FILE, SUBTYPE_NET, SUBTYPE_RNG, - SUBTYPE_VSOCK, TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG, TYPE_VSOCK, + MmioTransport, VirtioDevice, SUBTYPE_BALLOON, SUBTYPE_BLOCK_FILE, SUBTYPE_BLOCK_VHOST_USER, + SUBTYPE_NET, SUBTYPE_RNG, SUBTYPE_VSOCK, TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG, + TYPE_VSOCK, }; use crate::resources::VmResources; use crate::vmm_config::mmds::MmdsConfigError; @@ -50,6 +55,7 @@ use crate::EventManager; pub enum DevicePersistError { Balloon(BalloonError), BlockFile(BlockFileError), + BlockVhostUser(BlockVhostUserError), DeviceManager(super::mmio::MmioError), MmioTransport, #[cfg(target_arch = "aarch64")] @@ -89,6 +95,20 @@ pub struct ConnectedBlockFileState { pub device_info: MMIODeviceInfo, } +/// Holds the state of a vhost-user-backed block device connected to the MMIO space. +// NOTICE: Any changes to this structure require a snapshot version bump. +#[derive(Debug, Clone, Versionize)] +pub struct ConnectedBlockVhostUserState { + /// Device identifier. + pub device_id: String, + /// Device state. + pub device_state: BlockVhostUserState, + /// Mmio transport state. + pub transport_state: MmioTransportState, + /// VmmResources. + pub device_info: MMIODeviceInfo, +} + /// Holds the state of a net device connected to the MMIO space. // NOTICE: Any changes to this structure require a snapshot version bump. #[derive(Debug, Clone, Versionize)] @@ -179,6 +199,9 @@ pub struct DeviceStates { pub block_device_subtypes: Vec, /// File-backed block device states. pub block_file_devices: Vec, + /// Vhost-user-backed block device states. + #[version(start = 5, de_fn = "de_block_devices", ser_fn = "ser_block_devices")] + pub block_vhost_user_devices: Vec, /// Net device states. pub net_devices: Vec, /// Vsock device state. @@ -199,6 +222,7 @@ pub struct DeviceStates { #[derive(Debug)] pub enum SharedDeviceType { BlockFile(Arc>), + BlockVhostUser(Arc>), Network(Arc>), Balloon(Arc>), Vsock(Arc>>), @@ -297,6 +321,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { balloon_device: None, block_device_subtypes: Vec::new(), block_file_devices: Vec::new(), + block_vhost_user_devices: Vec::new(), net_devices: Vec::new(), vsock_device: None, #[cfg(target_arch = "aarch64")] @@ -347,8 +372,8 @@ impl<'a> Persist<'a> for MMIODeviceManager { }); } } - TYPE_BLOCK => { - if locked_device.device_subtype() == SUBTYPE_BLOCK_FILE { + TYPE_BLOCK => match locked_device.device_subtype() { + SUBTYPE_BLOCK_FILE => { let block = locked_device .as_mut_any() .downcast_mut::() @@ -362,7 +387,24 @@ impl<'a> Persist<'a> for MMIODeviceManager { }); states.block_device_subtypes.push(SUBTYPE_BLOCK_FILE); } - } + SUBTYPE_BLOCK_VHOST_USER => { + let block = locked_device + .as_mut_any() + .downcast_mut::() + .unwrap(); + block.prepare_save(); + states + .block_vhost_user_devices + .push(ConnectedBlockVhostUserState { + device_id: devid.clone(), + device_state: block.save(), + transport_state, + device_info: device_info.clone(), + }); + states.block_device_subtypes.push(SUBTYPE_BLOCK_VHOST_USER); + } + _ => (), + }, TYPE_NET => { if locked_device.device_subtype() == SUBTYPE_NET { let net = locked_device.as_any().downcast_ref::().unwrap(); @@ -557,29 +599,57 @@ impl<'a> Persist<'a> for MMIODeviceManager { } let mut block_file_iter = state.block_file_devices.iter(); + let mut block_vhost_user_iter = state.block_vhost_user_devices.iter(); + for block_device_subtype in &state.block_device_subtypes { - if *block_device_subtype == SUBTYPE_BLOCK_FILE { - // Safe to unwrap, because the subtype vector tracks the device distibution - // between the corresponding vectors. - let block_state = block_file_iter.next().unwrap(); - let device = Arc::new(Mutex::new(BlockFile::restore( - BlockFileConstructorArgs { mem: mem.clone() }, - &block_state.device_state, - )?)); - - (constructor_args.for_each_restored_device)( - constructor_args.vm_resources, - SharedDeviceType::BlockFile(device.clone()), - ); - - restore_helper( - device.clone(), - device, - &block_state.device_id, - &block_state.transport_state, - &block_state.device_info, - constructor_args.event_manager, - )?; + match *block_device_subtype { + SUBTYPE_BLOCK_FILE => { + // Safe to unwrap, because the subtype vector tracks the device distibution + // between the corresponding vectors. + let block_state = block_file_iter.next().unwrap(); + let device = Arc::new(Mutex::new(BlockFile::restore( + BlockFileConstructorArgs { mem: mem.clone() }, + &block_state.device_state, + )?)); + + (constructor_args.for_each_restored_device)( + constructor_args.vm_resources, + SharedDeviceType::BlockFile(device.clone()), + ); + + restore_helper( + device.clone(), + device, + &block_state.device_id, + &block_state.transport_state, + &block_state.device_info, + constructor_args.event_manager, + )?; + } + SUBTYPE_BLOCK_VHOST_USER => { + // Safe to unwrap, because the subtype vector tracks the device distibution + // between the corresponding vectors. + let block_state = block_vhost_user_iter.next().unwrap(); + let device = Arc::new(Mutex::new(BlockVhostUser::restore( + BlockVhostUserConstructorArgs { mem: mem.clone() }, + &block_state.device_state, + )?)); + + (constructor_args.for_each_restored_device)( + constructor_args.vm_resources, + SharedDeviceType::BlockVhostUser(device.clone()), + ); + + restore_helper( + device.clone(), + device, + &block_state.device_id, + &block_state.transport_state, + &block_state.device_info, + constructor_args.event_manager, + )?; + } + _ => (), } } @@ -936,7 +1006,8 @@ mod tests { "is_read_only": true, "rate_limiter": null, "io_engine": "Sync" - }} + }}, + "vhost_user": null }} ], "boot-source": {{ diff --git a/src/vmm/src/devices/virtio/block/mod.rs b/src/vmm/src/devices/virtio/block/mod.rs index a3e9602d8a4..eed0b3d1268 100644 --- a/src/vmm/src/devices/virtio/block/mod.rs +++ b/src/vmm/src/devices/virtio/block/mod.rs @@ -4,6 +4,7 @@ //! Implements a virtio block device. pub mod file; +pub mod vhost_user; use serde::{Deserialize, Serialize}; use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs new file mode 100644 index 00000000000..1e6cfc1acf4 --- /dev/null +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -0,0 +1,262 @@ +// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Portions Copyright 2019 Intel Corporation. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::cmp; +use std::io::Write; +use std::sync::atomic::AtomicUsize; +use std::sync::Arc; + +use log::error; +use logger::{IncMetric, METRICS}; +use utils::eventfd::EventFd; +use utils::vm_memory::GuestMemoryMmap; +use vhost::vhost_user::message::*; +use vhost::vhost_user::VhostUserMaster; +use virtio_gen::virtio_blk::{VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_F_VERSION_1}; +use virtio_gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; + +use super::{BlockVhostUserError, NUM_QUEUES, QUEUE_SIZE}; +use crate::arch::DeviceSubtype; +use crate::devices::virtio::queue::Queue; +use crate::devices::virtio::vhost_user::VhostUserHandle; +use crate::devices::virtio::{ + ActivateError, CacheType, DeviceState, Disk, DiskAttributes, IrqTrigger, VirtioDevice, + SUBTYPE_BLOCK_VHOST_USER, TYPE_BLOCK, +}; + +/// Block device config space size in bytes. +const BLOCK_CONFIG_SPACE_SIZE: usize = 60; + +/// vhost-user block device. +#[derive(Debug)] +pub struct BlockVhostUser { + // Virtio fields. + pub(crate) avail_features: u64, + pub(crate) acked_features: u64, + pub(crate) config_space: Vec, + pub(crate) activate_evt: EventFd, + + // Transport related fields. + pub(crate) queues: Vec, + pub(crate) queue_evts: [EventFd; NUM_QUEUES as usize], + pub(crate) device_state: DeviceState, + pub(crate) irq_trigger: IrqTrigger, + + // Disk attributes + pub(crate) disk_attrs: DiskAttributes, + + // Vhost user protocol handle + pub(crate) vu_handle: VhostUserHandle, +} + +impl BlockVhostUser { + pub fn new( + id: String, + partuuid: Option, + cache_type: CacheType, + is_disk_root: bool, + vhost_user_socket: &str, + ) -> Result { + let mut vu = VhostUserHandle::connect_vhost_user(vhost_user_socket, NUM_QUEUES) + .map_err(BlockVhostUserError::VhostUser)?; + + let mut avail_features = (1 << VIRTIO_F_VERSION_1) + | (1 << VIRTIO_RING_F_EVENT_IDX) + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); + + if cache_type == CacheType::Writeback { + avail_features |= 1u64 << VIRTIO_BLK_F_FLUSH; + } + + // We always try to negotiate readonly with the backend. + // If the backend is configured as readonly, we will accept it. + avail_features |= 1u64 << VIRTIO_BLK_F_RO; + + let avail_protocol_features = + VhostUserProtocolFeatures::CONFIG | VhostUserProtocolFeatures::REPLY_ACK; + + let (mut acked_features, acked_protocol_features) = vu + .negotiate_features_vhost_user(avail_features, avail_protocol_features) + .map_err(BlockVhostUserError::VhostUser)?; + + vu.acked_protocol_features = acked_protocol_features; + + let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(BlockVhostUserError::EventFd)?; + NUM_QUEUES as usize]; + + let queue_sizes: &[u16] = &[QUEUE_SIZE]; + let queues = queue_sizes.iter().map(|&s| Queue::new(s)).collect(); + + let config_space: Vec = vec![0u8; BLOCK_CONFIG_SPACE_SIZE]; + let (_, config_space) = vu + .socket_handle() + .get_config( + VHOST_USER_CONFIG_OFFSET, + config_space.len() as u32, + VhostUserConfigFlags::WRITABLE, + config_space.as_slice(), + ) + .map_err(BlockVhostUserError::Vhost)?; + + let is_disk_read_only = acked_features & (1 << VIRTIO_BLK_F_RO) != 0; + let disk_attrs = + DiskAttributes::new(id, partuuid, cache_type, is_disk_read_only, is_disk_root); + + avail_features = acked_features; + acked_features &= VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); + + Ok(Self { + avail_features, + acked_features, + config_space, + activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BlockVhostUserError::EventFd)?, + queues, + queue_evts, + device_state: DeviceState::Inactive, + irq_trigger: IrqTrigger::new().map_err(BlockVhostUserError::IrqTrigger)?, + disk_attrs, + vu_handle: vu, + }) + } + + /// Provides backing vhost user path of this block device. + pub fn socket(&self) -> &String { + &self.vu_handle.socket_path + } + + /// Prepare device for being snapshotted. + pub fn prepare_save(&mut self) {} + + /// Set up vhost-user connection. + pub fn setup_vhost_user(&mut self, mem: &GuestMemoryMmap) -> Result<(), BlockVhostUserError> { + self.vu_handle + .setup_vhost_user( + mem, + [(0, &self.queues[0], &self.queue_evts[0])].to_vec(), + &self.irq_trigger, + self.acked_features, + ) + .map_err(BlockVhostUserError::VhostUser) + } + + /// Provides non-mutable reference to this device's block. + pub fn block(&self) -> &DiskAttributes { + &self.disk_attrs + } +} + +impl VirtioDevice for BlockVhostUser { + fn avail_features(&self) -> u64 { + self.avail_features + } + + fn acked_features(&self) -> u64 { + self.acked_features + } + + fn set_acked_features(&mut self, acked_features: u64) { + self.acked_features = acked_features; + } + + fn device_type(&self) -> u32 { + TYPE_BLOCK + } + + fn device_subtype(&self) -> DeviceSubtype { + SUBTYPE_BLOCK_VHOST_USER + } + + fn queues(&self) -> &[Queue] { + &self.queues + } + + fn queues_mut(&mut self) -> &mut [Queue] { + &mut self.queues + } + + fn queue_events(&self) -> &[EventFd] { + &self.queue_evts + } + + fn interrupt_evt(&self) -> &EventFd { + &self.irq_trigger.irq_evt + } + + /// Returns the current device interrupt status. + fn interrupt_status(&self) -> Arc { + self.irq_trigger.irq_status.clone() + } + + fn read_config(&self, offset: u64, mut data: &mut [u8]) { + let config_len = self.config_space.len() as u64; + if offset >= config_len { + error!("Failed to read config space"); + METRICS.block.cfg_fails.inc(); + return; + } + if let Some(end) = offset.checked_add(data.len() as u64) { + // This write can't fail, offset and end are checked against config_len. + data.write_all(&self.config_space[offset as usize..cmp::min(end, config_len) as usize]) + .unwrap(); + } + } + + fn write_config(&mut self, _offset: u64, _data: &[u8]) { + // We do not advertise VIRTIO_BLK_F_CONFIG_WCE + // that would allow configuring the "writeback" field. + // Other block config fields are immutable. + } + + fn activate(&mut self, mem: GuestMemoryMmap) -> Result<(), ActivateError> { + self.vu_handle + .setup_vhost_user( + &mem, + [(0, &self.queues[0], &self.queue_evts[0])].to_vec(), + &self.irq_trigger, + self.acked_features, + ) + .map_err(|err| { + METRICS.block.activate_fails.inc(); + ActivateError::VhostUser(err) + })?; + Ok(()) + } + + fn is_activated(&self) -> bool { + self.device_state.is_activated() + } + + fn can_update_interrupt_status(&self) -> bool { + false + } +} + +impl Disk for BlockVhostUser { + /// Provides the ID of this block device. + fn id(&self) -> &String { + self.block().id() + } + + /// Provides the PARTUUID of this block device. + fn partuuid(&self) -> Option<&String> { + self.block().partuuid() + } + + /// Specifies if this block device is read only. + fn is_read_only(&self) -> bool { + self.block().is_read_only() + } + + /// Specifies if this block device is read only. + fn is_root_device(&self) -> bool { + self.block().is_root_device() + } + + /// Specifies block device cache type. + fn cache_type(&self) -> CacheType { + self.block().cache_type() + } +} diff --git a/src/vmm/src/devices/virtio/block/vhost_user/event_handler.rs b/src/vmm/src/devices/virtio/block/vhost_user/event_handler.rs new file mode 100644 index 00000000000..48f8c9d1ed8 --- /dev/null +++ b/src/vmm/src/devices/virtio/block/vhost_user/event_handler.rs @@ -0,0 +1,74 @@ +// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::os::unix::io::AsRawFd; + +use event_manager::{EventOps, Events, MutEventSubscriber}; +use logger::{error, warn}; +use utils::epoll::EventSet; + +use crate::devices::virtio::block::vhost_user::device::BlockVhostUser; + +impl BlockVhostUser { + fn register_activate_event(&self, ops: &mut EventOps) { + if let Err(err) = ops.add(Events::new(&self.activate_evt, EventSet::IN)) { + error!("Failed to register activate event: {}", err); + } + } + + fn process_activate_event(&self, ops: &mut EventOps) { + if let Err(err) = self.activate_evt.read() { + error!("Failed to consume block activate event: {:?}", err); + } + if let Err(err) = ops.remove(Events::new(&self.activate_evt, EventSet::IN)) { + error!("Failed to un-register activate event: {}", err); + } + } + + fn is_activated(&self) -> bool { + self.device_state.is_activated() + } +} + +impl MutEventSubscriber for BlockVhostUser { + // Handle an event for queue or rate limiter. + fn process(&mut self, event: Events, ops: &mut EventOps) { + let source = event.fd(); + let event_set = event.event_set(); + let supported_events = EventSet::IN; + + if !supported_events.contains(event_set) { + warn!( + "Received unknown event: {:?} from source: {:?}", + event_set, source + ); + return; + } + + if self.is_activated() { + let activate_fd = self.activate_evt.as_raw_fd(); + // Looks better than C style if/else if/else. + match source { + _ if activate_fd == source => self.process_activate_event(ops), + _ => warn!("BlockVhost: Spurious event received: {:?}", source), + } + } else { + warn!( + "BlockVhost: The device is not yet activated. Spurious event received: {:?}", + source + ); + } + } + + fn init(&mut self, ops: &mut EventOps) { + // This function can be called during different points in the device lifetime: + // - shortly after device creation, + // - on device activation (is-activated already true at this point), + // - on device restore from snapshot. + if self.is_activated() { + error!("This a vhost backed block. Not sure why I received this event"); + } else { + self.register_activate_event(ops); + } + } +} diff --git a/src/vmm/src/devices/virtio/block/vhost_user/mod.rs b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs new file mode 100644 index 00000000000..69cc8613a1f --- /dev/null +++ b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs @@ -0,0 +1,35 @@ +// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +pub mod device; +mod event_handler; +pub mod persist; + +pub use self::device::BlockVhostUser; +use crate::devices::virtio::vhost_user::VhostUserError; + +/// Number of queues for the vhost-user block device. +pub const NUM_QUEUES: u64 = 1; + +/// Queue size for the vhost-user block device. +pub const QUEUE_SIZE: u16 = 256; + +/// Vhost-user block device error. +#[derive(Debug, thiserror::Error)] +pub enum BlockVhostUserError { + // Persistence error. + #[error("Persistence error: {0}")] + Persist(crate::devices::virtio::persist::PersistError), + // Vhost-user error. + #[error("Vhost-user error: {0}")] + VhostUser(VhostUserError), + // Vhost error. + #[error("Vhost error: {0}")] + Vhost(vhost::Error), + // Error opening eventfd. + #[error("Error opening eventfd: {0}")] + EventFd(std::io::Error), + // Error creating an irqfd. + #[error("Error creating irqfd: {0}")] + IrqTrigger(std::io::Error), +} diff --git a/src/vmm/src/devices/virtio/block/vhost_user/persist.rs b/src/vmm/src/devices/virtio/block/vhost_user/persist.rs new file mode 100644 index 00000000000..ec1d0bd7583 --- /dev/null +++ b/src/vmm/src/devices/virtio/block/vhost_user/persist.rs @@ -0,0 +1,122 @@ +// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Defines the structures needed for saving/restoring block devices. + +use std::sync::atomic::AtomicUsize; +use std::sync::Arc; + +use snapshot::Persist; +use utils::eventfd::EventFd; +use utils::vm_memory::GuestMemoryMmap; +use versionize::{VersionMap, Versionize, VersionizeResult}; +use versionize_derive::Versionize; +use virtio_gen::virtio_blk::VIRTIO_BLK_F_RO; + +use super::{BlockVhostUser, BlockVhostUserError}; +use crate::devices::virtio::block::vhost_user::{NUM_QUEUES, QUEUE_SIZE}; +use crate::devices::virtio::block::CacheTypeState; +use crate::devices::virtio::persist::VirtioDeviceState; +use crate::devices::virtio::vhost_user::VhostUserHandle; +use crate::devices::virtio::{DeviceState, Disk, DiskAttributes, IrqTrigger, TYPE_BLOCK}; + +/// vhost-user block device state. +// NOTICE: Any changes to this structure require a snapshot version bump. +#[derive(Debug, Clone, Versionize)] +pub struct BlockVhostUserState { + id: String, + partuuid: Option, + cache_type: CacheTypeState, + root_device: bool, + socket_path: String, + acked_protocol_features: u64, + config_space: Vec, + virtio_state: VirtioDeviceState, +} + +/// Auxiliary structure for creating a device when resuming from a snapshot. +#[derive(Debug)] +pub struct BlockVhostUserConstructorArgs { + pub mem: GuestMemoryMmap, +} + +impl Persist<'_> for BlockVhostUser { + type State = BlockVhostUserState; + type ConstructorArgs = BlockVhostUserConstructorArgs; + type Error = BlockVhostUserError; + + fn save(&self) -> Self::State { + // Save device state. + BlockVhostUserState { + id: self.id().clone(), + partuuid: self.partuuid().cloned(), + cache_type: CacheTypeState::from(self.block().cache_type()), + root_device: self.is_root_device(), + socket_path: self.socket().clone(), + acked_protocol_features: self.vu_handle.acked_protocol_features, + config_space: self.config_space.clone(), + virtio_state: VirtioDeviceState::from_device(self), + } + } + + fn restore( + constructor_args: Self::ConstructorArgs, + state: &Self::State, + ) -> Result { + let is_disk_read_only = state.virtio_state.avail_features & (1u64 << VIRTIO_BLK_F_RO) != 0; + + let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(BlockVhostUserError::EventFd)?; + NUM_QUEUES as usize]; + + let disk_attrs = DiskAttributes::new( + state.id.clone(), + state.partuuid.clone(), + state.cache_type.into(), + is_disk_read_only, + state.root_device, + ); + + let vu = VhostUserHandle::connect_vhost_user(state.socket_path.as_str(), NUM_QUEUES) + .map_err(BlockVhostUserError::VhostUser)?; + + let mut irq_trigger = IrqTrigger::new().map_err(BlockVhostUserError::IrqTrigger)?; + irq_trigger.irq_status = Arc::new(AtomicUsize::new(state.virtio_state.interrupt_status)); + + let mut block = Self { + avail_features: state.virtio_state.avail_features, + acked_features: state.virtio_state.acked_features, + config_space: state.config_space.clone(), + activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BlockVhostUserError::EventFd)?, + queues: state + .virtio_state + .build_queues_checked( + &constructor_args.mem, + TYPE_BLOCK, + NUM_QUEUES as usize, + QUEUE_SIZE, + ) + .map_err(BlockVhostUserError::Persist)?, + queue_evts, + device_state: DeviceState::Inactive, + irq_trigger, + disk_attrs, + vu_handle: vu, + }; + + if state.virtio_state.activated { + block.device_state = DeviceState::Activated(constructor_args.mem.clone()); + } + + block + .vu_handle + .set_protocol_features_vhost_user( + block.acked_features, + block.vu_handle.acked_protocol_features, + ) + .map_err(BlockVhostUserError::VhostUser)?; + + block.setup_vhost_user(&constructor_args.mem)?; + + Ok(block) + } +} diff --git a/src/vmm/src/devices/virtio/mod.rs b/src/vmm/src/devices/virtio/mod.rs index c85d0e1a28c..011128caa15 100644 --- a/src/vmm/src/devices/virtio/mod.rs +++ b/src/vmm/src/devices/virtio/mod.rs @@ -20,6 +20,7 @@ pub mod persist; mod queue; pub mod rng; pub mod test_utils; +mod vhost_user; pub mod vsock; pub use self::balloon::*; @@ -65,6 +66,7 @@ pub const TYPE_BALLOON: u32 = 5; pub const SUBTYPE_NON_VIRTIO: DeviceSubtype = 0; pub const SUBTYPE_NET: DeviceSubtype = 1; pub const SUBTYPE_BLOCK_FILE: DeviceSubtype = 1; +pub const SUBTYPE_BLOCK_VHOST_USER: DeviceSubtype = 2; pub const SUBTYPE_RNG: DeviceSubtype = 1; pub const SUBTYPE_BALLOON: DeviceSubtype = 1; pub const SUBTYPE_VSOCK: DeviceSubtype = 1; @@ -82,6 +84,9 @@ pub enum ActivateError { /// General error at activation. #[error("General error at activation")] BadActivate, + /// Vhost-user-related error. + #[error("Vhost user: {0}")] + VhostUser(crate::devices::virtio::vhost_user::VhostUserError), } /// Trait that helps in upcasting an object to Any diff --git a/src/vmm/src/devices/virtio/vhost_user/mod.rs b/src/vmm/src/devices/virtio/vhost_user/mod.rs index 110488e5a1a..710d0930960 100644 --- a/src/vmm/src/devices/virtio/vhost_user/mod.rs +++ b/src/vmm/src/devices/virtio/vhost_user/mod.rs @@ -182,7 +182,7 @@ impl VhostUserHandle { let vhost_user_net_reg = VhostUserMemoryRegionInfo { guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len() as u64, + memory_size: region.len(), userspace_addr: region.as_ptr() as u64, mmap_offset, mmap_handle, @@ -210,6 +210,9 @@ impl VhostUserHandle { .set_features(acked_features) .map_err(VhostUserError::VhostUserSetFeatures)?; + // Update acked features after they have been sent to the backend. + // self.acked_features = acked_features; + // Provide the memory table to the backend. self.update_mem_table(mem)?; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index d60ae273e77..e4b752b8cbe 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -207,6 +207,10 @@ impl VmResources { self.block.add_device(block); } + SharedDeviceType::BlockVhostUser(block) => { + self.block.add_vhost_user_device(block); + } + SharedDeviceType::Network(network) => { self.net_builder.add_device(network); } @@ -525,6 +529,7 @@ mod tests { rate_limiter: Some(RateLimiterConfig::default()), file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }, tmp_file, ) diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index ee5c2c8dbd8..a4916f78499 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -1392,6 +1392,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }); check_preboot_request(req, |result, vm_res| { assert_eq!(result, Ok(VmmData::Empty)); @@ -1408,6 +1409,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }); check_preboot_request_err( req, @@ -2058,6 +2060,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }), VmmActionError::OperationNotSupportedPostBoot, ); @@ -2166,6 +2169,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }); verify_load_snap_disallowed_after_boot_resources(req, "InsertBlockDevice"); diff --git a/src/vmm/src/vmm_config/drive.rs b/src/vmm/src/vmm_config/drive.rs index 4914ee3d44b..e5b2211951c 100644 --- a/src/vmm/src/vmm_config/drive.rs +++ b/src/vmm/src/vmm_config/drive.rs @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize}; use super::RateLimiterConfig; pub use crate::devices::virtio::block::file::device::FileEngineType; use crate::devices::virtio::block::file::{BlockFile, BlockFileError}; +use crate::devices::virtio::block::vhost_user::{BlockVhostUser, BlockVhostUserError}; pub use crate::devices::virtio::block::CacheType; use crate::devices::virtio::Disk; use crate::VmmError; @@ -22,6 +23,8 @@ use crate::VmmError; pub enum DriveError { /// Unable to create the host-file-backed block device: {0:?} CreateBlockFileDevice(BlockFileError), + /// Could not create a vhost-user-backed Block Device. + CreateBlockVhostUserDevice(BlockVhostUserError), /// Cannot create RateLimiter: {0} CreateRateLimiter(io::Error), /// Unable to patch the block device: {0} @@ -32,6 +35,10 @@ pub enum DriveError { MissingReadOnly, /// A root block device already exists! RootBlockDeviceAlreadyAdded, + /// Mutliple block device configurations provided. + MultipleConfigsProvided, + /// The vhost user socket is invalid: {0} + InvalidVhostUserSocket(String), } /// Configuration for the host-file-backed block device. @@ -51,6 +58,14 @@ pub struct FileConfig { pub file_engine_type: FileEngineType, } +/// Configuration for the vhost-user-backed block device. +#[derive(Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct VhostUserConfig { + /// Socket path for vhost user + pub socket: String, +} + /// Use this structure to set up the Block Device before booting the kernel. #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] @@ -81,6 +96,8 @@ pub struct BlockDeviceConfig { pub file_engine_type: FileEngineType, /// Configuration for the host-file-backed block device. pub file: Option, + /// Configuration for the vhost-user-backed block device. + pub vhost_user: Option, } impl From<&BlockFile> for BlockDeviceConfig { @@ -101,6 +118,26 @@ impl From<&BlockFile> for BlockDeviceConfig { rate_limiter: rl.into_option(), file_engine_type: block.file_engine_type(), }), + vhost_user: None, + } + } +} + +impl From<&BlockVhostUser> for BlockDeviceConfig { + fn from(block: &BlockVhostUser) -> Self { + BlockDeviceConfig { + drive_id: block.id().clone(), + path_on_host: None, + is_root_device: block.is_root_device(), + partuuid: block.partuuid().cloned(), + is_read_only: Some(block.is_read_only()), + cache_type: block.cache_type(), + rate_limiter: None, + file_engine_type: FileEngineType::default(), + file: None, + vhost_user: Some(VhostUserConfig { + socket: block.socket().clone(), + }), } } } @@ -138,6 +175,8 @@ pub struct BlockDeviceUpdateConfig { pub enum Block { /// Host-file-backed block device. FileBacked(Arc>), + /// Vhost-user-backed block device. + VhostUserBacked(Arc>), } /// Wrapper for the collection that holds all the Block Devices @@ -165,6 +204,9 @@ impl BlockBuilder { if let Some(block) = self.list.get(0) { match block { Block::FileBacked(block) => block.lock().expect("Poisoned lock").is_root_device(), + Block::VhostUserBacked(block) => { + block.lock().expect("Poisoned lock").is_root_device() + } } } else { false @@ -175,6 +217,7 @@ impl BlockBuilder { fn get_index_of_drive_id(&self, drive_id: &str) -> Option { self.list.iter().position(|b| match b { Block::FileBacked(b) => b.lock().expect("Poisoned lock").id().eq(drive_id), + Block::VhostUserBacked(b) => b.lock().expect("Poisoned lock").id().eq(drive_id), }) } @@ -187,6 +230,15 @@ impl BlockBuilder { } } + /// Inserts an existing block device. + pub fn add_vhost_user_device(&mut self, block_device: Arc>) { + if block_device.lock().expect("Poisoned lock").is_root_device() { + self.list.push_front(Block::VhostUserBacked(block_device)); + } else { + self.list.push_back(Block::VhostUserBacked(block_device)); + } + } + /// Inserts a `Block` in the block devices list using the specified configuration. /// If a block with the same id already exists, it will overwrite it. /// Inserting a secondary root block device will fail. @@ -201,21 +253,28 @@ impl BlockBuilder { return Err(DriveError::RootBlockDeviceAlreadyAdded); } - let block_dev = Arc::new(Mutex::new(Self::create_block_file(config)?)); + let block_dev = match (&config.file, &config.vhost_user) { + (Some(_), Some(_)) => return Err(DriveError::MultipleConfigsProvided), + (None, Some(_)) => { + Block::VhostUserBacked(Arc::new(Mutex::new(Self::create_block_vhost_user(config)?))) + } + (_, None) => Block::FileBacked(Arc::new(Mutex::new(Self::create_block_file(config)?))), + }; + // If the id of the drive already exists in the list, the operation is update/overwrite. match position { // New block device. None => { if is_root_device { - self.list.push_front(Block::FileBacked(block_dev)); + self.list.push_front(block_dev); } else { - self.list.push_back(Block::FileBacked(block_dev)); + self.list.push_back(block_dev); } } // Update existing block device. Some(index) => { // Update the slot with the new block. - self.list[index] = Block::FileBacked(block_dev); + self.list[index] = block_dev; // Check if the root block device is being updated. if index != 0 && is_root_device { // Make sure the root device is on the first position. @@ -274,6 +333,31 @@ impl BlockBuilder { .map_err(DriveError::CreateBlockFileDevice) } + /// Creates a vhost-user-backed Block device from a BlockDeviceConfig. + fn create_block_vhost_user( + block_device_config: BlockDeviceConfig, + ) -> Result { + let socket = match block_device_config.vhost_user { + Some(vhost_user) => vhost_user.socket, + None => return Err(DriveError::InvalidVhostUserSocket("".to_string())), + }; + + // Check if the socket exists + if !PathBuf::from(&socket).exists() { + return Err(DriveError::InvalidVhostUserSocket(socket)); + } + + // Create and return the Block device + BlockVhostUser::new( + block_device_config.drive_id, + block_device_config.partuuid, + block_device_config.cache_type, + block_device_config.is_root_device, + &socket, + ) + .map_err(DriveError::CreateBlockVhostUserDevice) + } + /// Returns a vec with the structures used to configure the devices. pub fn configs(&self) -> Vec { let mut ret = vec![]; @@ -282,6 +366,9 @@ impl BlockBuilder { Block::FileBacked(block) => { ret.push(BlockDeviceConfig::from(block.lock().unwrap().deref())); } + Block::VhostUserBacked(block) => { + ret.push(BlockDeviceConfig::from(block.lock().unwrap().deref())); + } } } ret @@ -320,6 +407,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), }), + vhost_user: None, } } } @@ -350,6 +438,7 @@ mod tests { rate_limiter: None, file_engine_type: engine, file: None, + vhost_user: None, }; let mut old_api_block_devs = BlockBuilder::new(); @@ -375,6 +464,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::Async, }), + vhost_user: None, }; let mut new_api_block_devs = BlockBuilder::new(); @@ -401,6 +491,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -421,6 +512,7 @@ mod tests { dummy_block_device.is_read_only.unwrap() ); } + Block::VhostUserBacked(_) => todo!(), } } assert_eq!(block_devs.get_index_of_drive_id(&dummy_id), Some(0)); @@ -441,6 +533,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -460,6 +553,7 @@ mod tests { dummy_block_device.is_read_only.unwrap() ); } + Block::VhostUserBacked(_) => todo!(), } } } @@ -478,6 +572,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_2 = TempFile::new().unwrap(); @@ -492,6 +587,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -517,6 +613,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_2 = TempFile::new().unwrap(); @@ -531,6 +628,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_3 = TempFile::new().unwrap(); @@ -545,6 +643,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -561,6 +660,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &root_block_device.drive_id); } + Block::VhostUserBacked(_) => todo!(), } let block = block_iter.next().unwrap(); @@ -568,6 +668,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &dummy_block_dev_2.drive_id); } + Block::VhostUserBacked(_) => todo!(), } let block = block_iter.next().unwrap(); @@ -575,6 +676,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &dummy_block_dev_3.drive_id); } + Block::VhostUserBacked(_) => todo!(), } } @@ -593,6 +695,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_2 = TempFile::new().unwrap(); @@ -607,6 +710,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_3 = TempFile::new().unwrap(); @@ -621,6 +725,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -638,6 +743,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &root_block_device.drive_id); } + Block::VhostUserBacked(_) => todo!(), } let block = block_iter.next().unwrap(); @@ -645,6 +751,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &dummy_block_dev_2.drive_id); } + Block::VhostUserBacked(_) => todo!(), } let block = block_iter.next().unwrap(); @@ -652,6 +759,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &dummy_block_dev_3.drive_id); } + Block::VhostUserBacked(_) => todo!(), } } @@ -669,6 +777,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let dummy_file_2 = TempFile::new().unwrap(); @@ -683,6 +792,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -720,6 +830,7 @@ mod tests { Block::FileBacked(block) => { assert!(block.lock().unwrap().is_read_only()); } + Block::VhostUserBacked(_) => todo!(), } // Update with invalid path. @@ -748,6 +859,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; // Switch roots and add a PARTUUID for the new one. let mut root_block_device_old = root_block_device; @@ -762,6 +874,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; assert!(block_devs.insert(root_block_device_old).is_ok()); let root_block_id = root_block_device_new.drive_id.clone(); @@ -773,6 +886,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().id(), &root_block_id); } + Block::VhostUserBacked(_) => todo!(), } } @@ -790,6 +904,7 @@ mod tests { rate_limiter: None, file_engine_type: FileEngineType::default(), file: None, + vhost_user: None, }; let mut block_devs = BlockBuilder::new(); @@ -835,6 +950,7 @@ mod tests { Block::FileBacked(block) => { assert_eq!(block.lock().unwrap().deref().id(), block_id) } + Block::VhostUserBacked(_) => todo!(), } } }