From c1e145294d2b67aee91a22e4785e5fa87f32bad9 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Tue, 14 Jan 2020 17:45:21 +0800 Subject: [PATCH 01/29] Remove vm-memory dependency Use u64 for guest memory address type since this can make vm-device independent from on vm-memory. Signed-off-by: Jing Liu --- Cargo.toml | 1 - src/lib.rs | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 16e2bc5..42ef5bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,3 @@ repository = "https://github.com/rust-vmm/vm-device" license = "Apache-2.0" [dependencies] -vm-memory = { git = "https://github.com/rust-vmm/vm-memory" } diff --git a/src/lib.rs b/src/lib.rs index 9ef255d..7d2eaa1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,10 +3,6 @@ //! rust-vmm device model. -extern crate vm_memory; - -use vm_memory::GuestAddress; - pub mod resources; /// IO Addresses. @@ -16,7 +12,7 @@ pub enum IoAddress { Pio(u16), /// Memory mapped I/O address. - Mmio(GuestAddress), + Mmio(u64), } /// Device IO trait. From 06b9e004ecddcaeb13649e03dd2460eaafb9e57f Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Tue, 14 Jan 2020 17:40:50 +0800 Subject: [PATCH 02/29] Improve DeviceIo interface Change DeviceIo interface parameters to base and offset, so that devices with several IO ranges can use it to locate right range. Signed-off-by: Jing Liu --- src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7d2eaa1..6bde461 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,9 +21,11 @@ pub enum IoAddress { /// The VMM will then dispatch IO (PIO or MMIO) VM exits by calling into the /// registered devices read or write method from this trait. pub trait DeviceIo: Send { - /// Read from the guest physical address `addr` to `data`. - fn read(&mut self, addr: IoAddress, data: &mut [u8]); + /// Read from guest physical address `base + offset` of the registered + /// device to `data`. + fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]); - /// Write `data` to the guest physical address `addr`. - fn write(&mut self, addr: IoAddress, data: &[u8]); + /// Write `data` to the guest physical address `base + offset` of the + /// registered device. + fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]); } From d4664f87fd4ad472bef4b1d38695b67b3cb08fcb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 14 Jan 2020 15:05:13 +0100 Subject: [PATCH 03/29] lib: Improve DeviceIo methods documentation As suggested from https://github.com/rust-vmm/vm-device/pull/18#discussion_r366238131 Suggested-by: Andreea Florescu Signed-off-by: Samuel Ortiz --- src/lib.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6bde461..b60ba5d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,11 +21,10 @@ pub enum IoAddress { /// The VMM will then dispatch IO (PIO or MMIO) VM exits by calling into the /// registered devices read or write method from this trait. pub trait DeviceIo: Send { - /// Read from guest physical address `base + offset` of the registered - /// device to `data`. + /// Read from the guest physical address `base`, starting at `offset`. + /// Result is placed in `data`. fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]); - /// Write `data` to the guest physical address `base + offset` of the - /// registered device. + /// Write `data` to the guest physical address `base`, starting from `offset`. fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]); } From 3d51c9e19f505ade6a1f75eef07e5df45b03b8c9 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Tue, 17 Dec 2019 19:32:42 +0800 Subject: [PATCH 04/29] Update rust-vmm-ci Signed-off-by: Jing Liu --- rust-vmm-ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-vmm-ci b/rust-vmm-ci index bb1cd14..c309d06 160000 --- a/rust-vmm-ci +++ b/rust-vmm-ci @@ -1 +1 @@ -Subproject commit bb1cd14d2c164b4f699b08c885c06a02fbe3f7b0 +Subproject commit c309d0627bde6b07db91201dd8b47007841c100a From 6aae2024a9dff840c22c62a9a748057466ce34f3 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Mon, 20 Jan 2020 18:21:10 +0800 Subject: [PATCH 05/29] Make DeviceIo internal mutability In order to get a real multiple threads handling to enhance performance, the DeviceIo trait need adopt interior mutability pattern. Signed-off-by: Jing Liu --- src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b60ba5d..b0b272d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,11 +20,13 @@ pub enum IoAddress { /// register itself against the different IO type ranges it handles. /// The VMM will then dispatch IO (PIO or MMIO) VM exits by calling into the /// registered devices read or write method from this trait. +/// The DeviceIo trait adopts the interior mutability pattern +/// so we can get a real multiple threads handling. pub trait DeviceIo: Send { /// Read from the guest physical address `base`, starting at `offset`. /// Result is placed in `data`. - fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]); + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]); /// Write `data` to the guest physical address `base`, starting from `offset`. - fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]); + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]); } From ef21cf17f135fc70bad560b0a591aadea08c045f Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Thu, 31 Oct 2019 22:36:42 +0800 Subject: [PATCH 06/29] Add IO manager support Based on resources definition, this adds device IO manager to manage all devices IO ranges. Signed-off-by: Jing Liu --- src/device_manager.rs | 108 ++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 109 insertions(+) create mode 100644 src/device_manager.rs diff --git a/src/device_manager.rs b/src/device_manager.rs new file mode 100644 index 0000000..1d858c1 --- /dev/null +++ b/src/device_manager.rs @@ -0,0 +1,108 @@ +// Copyright © 2019 Intel Corporation. All Rights Reserved. +// SPDX-License-Identifier: (Apache-2.0 OR BSD-3-Clause) + +//! System level device management. +//! +//! [IoManager](struct.IoManager.html) is respondsible for managing +//! all devices of virtual machine, registering IO resources callback, +//! unregistering devices and helping VM IO exit handling. +//! +//!VMM would be responsible for getting device resource request, ask +//! vm_allocator to allocate the resources, ask vm_device to register the +//! devices IO ranges, and finally set resources to virtual device. + +use crate::resources::Resource; +use crate::DeviceIo; + +use std::collections::btree_map::BTreeMap; +use std::result; +use std::sync::Arc; + +/// Error type for `IoManager` usage. +#[derive(Debug)] +pub enum Error { + /// The inserting device overlaps with a current device. + DeviceOverlap, +} + +/// Simplify the `Result` type. +pub type Result = result::Result; + +/// System IO manager serving for all devices management and VM exit handling. +#[derive(Default)] +pub struct IoManager { + /// Range mapping for VM exit pio operations. + pio_bus: BTreeMap<(u16, u16), Arc>, + /// Range mapping for VM exit mmio operations. + mmio_bus: BTreeMap<(u64, u64), Arc>, +} + +impl IoManager { + /// Create an default IoManager with empty IO member. + pub fn new() -> Self { + IoManager::default() + } + /// Register a new device IO with its allocated resources. + /// VMM is responsible for providing the allocated resources to virtual device. + /// + /// # Arguments + /// + /// * `device`: device instance object to be registered + /// * `resources`: resources that this device owns, might include + /// port I/O and memory-mapped I/O ranges, irq number, etc. + pub fn register_device_io( + &mut self, + device: Arc, + resources: &[Resource], + ) -> Result<()> { + // Register and mark device resources + // The resources addresses being registered are sucessfully allocated before. + for (idx, res) in resources.iter().enumerate() { + match *res { + Resource::PioAddressRange { base, size } => { + if self.pio_bus.insert((base, size), device.clone()).is_some() { + // Unregister registered resources. + self.unregister_device_io(&resources[0..idx]) + .expect("failed to unregister devices"); + + return Err(Error::DeviceOverlap); + } + } + Resource::MmioAddressRange { base, size } => { + if self.mmio_bus.insert((base, size), device.clone()).is_some() { + // Unregister registered resources. + self.unregister_device_io(&resources[0..idx]) + .expect("failed to unregister devices"); + + return Err(Error::DeviceOverlap); + } + } + _ => continue, + } + } + Ok(()) + } + + /// Unregister a device from `IoManager`, e.g. users specified removing. + /// VMM pre-fetches the resources e.g. dev.get_assigned_resources() + /// VMM is responsible for freeing the resources. + /// + /// # Arguments + /// + /// * `resources`: resources that this device owns, might include + /// port I/O and memory-mapped I/O ranges, irq number, etc. + pub fn unregister_device_io(&mut self, resources: &[Resource]) -> Result<()> { + for res in resources.iter() { + match *res { + Resource::PioAddressRange { base, size } => { + self.pio_bus.remove(&(base, size)); + } + Resource::MmioAddressRange { base, size } => { + self.mmio_bus.remove(&(base, size)); + } + _ => continue, + } + } + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index b0b272d..8620bfb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ //! rust-vmm device model. +pub mod device_manager; pub mod resources; /// IO Addresses. From 372bedf94bbc5cf5485cc10d5832c23b0b92a694 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Sat, 23 Nov 2019 00:58:29 +0800 Subject: [PATCH 07/29] Add read and write operations handling IO manager is responsible for handling IO operation when VMExit. It works out the specific device according to the address range and hand over to DeviceIo trait. Signed-off-by: Jing Liu --- src/device_manager.rs | 142 +++++++++++++++++++++++++++++++++++++++--- src/lib.rs | 52 ++++++++++++++++ 2 files changed, 187 insertions(+), 7 deletions(-) diff --git a/src/device_manager.rs b/src/device_manager.rs index 1d858c1..11471a8 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -12,8 +12,9 @@ //! devices IO ranges, and finally set resources to virtual device. use crate::resources::Resource; -use crate::DeviceIo; +use crate::{DeviceIo, IoAddress, IoSize}; +use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; use std::collections::btree_map::BTreeMap; use std::result; use std::sync::Arc; @@ -23,18 +24,62 @@ use std::sync::Arc; pub enum Error { /// The inserting device overlaps with a current device. DeviceOverlap, + /// The device doesn't exist. + NoDevice, } /// Simplify the `Result` type. pub type Result = result::Result; +// Structure describing an IO range. +#[derive(Debug, Copy, Clone)] +struct IoRange { + base: IoAddress, + size: IoSize, +} + +impl IoRange { + fn new_pio_range(base: u16, size: u16) -> Self { + IoRange { + base: IoAddress::Pio(base), + size: IoSize::Pio(size), + } + } + fn new_mmio_range(base: u64, size: u64) -> Self { + IoRange { + base: IoAddress::Mmio(base), + size: IoSize::Mmio(size), + } + } +} + +impl Eq for IoRange {} + +impl PartialEq for IoRange { + fn eq(&self, other: &IoRange) -> bool { + self.base == other.base + } +} + +impl Ord for IoRange { + fn cmp(&self, other: &IoRange) -> Ordering { + self.base.cmp(&other.base) + } +} + +impl PartialOrd for IoRange { + fn partial_cmp(&self, other: &IoRange) -> Option { + self.base.partial_cmp(&other.base) + } +} + /// System IO manager serving for all devices management and VM exit handling. #[derive(Default)] pub struct IoManager { /// Range mapping for VM exit pio operations. - pio_bus: BTreeMap<(u16, u16), Arc>, + pio_bus: BTreeMap>, /// Range mapping for VM exit mmio operations. - mmio_bus: BTreeMap<(u64, u64), Arc>, + mmio_bus: BTreeMap>, } impl IoManager { @@ -60,7 +105,11 @@ impl IoManager { for (idx, res) in resources.iter().enumerate() { match *res { Resource::PioAddressRange { base, size } => { - if self.pio_bus.insert((base, size), device.clone()).is_some() { + if self + .pio_bus + .insert(IoRange::new_pio_range(base, size), device.clone()) + .is_some() + { // Unregister registered resources. self.unregister_device_io(&resources[0..idx]) .expect("failed to unregister devices"); @@ -69,7 +118,11 @@ impl IoManager { } } Resource::MmioAddressRange { base, size } => { - if self.mmio_bus.insert((base, size), device.clone()).is_some() { + if self + .mmio_bus + .insert(IoRange::new_mmio_range(base, size), device.clone()) + .is_some() + { // Unregister registered resources. self.unregister_device_io(&resources[0..idx]) .expect("failed to unregister devices"); @@ -95,14 +148,89 @@ impl IoManager { for res in resources.iter() { match *res { Resource::PioAddressRange { base, size } => { - self.pio_bus.remove(&(base, size)); + self.pio_bus.remove(&IoRange::new_pio_range(base, size)); } Resource::MmioAddressRange { base, size } => { - self.mmio_bus.remove(&(base, size)); + self.mmio_bus.remove(&IoRange::new_mmio_range(base, size)); } _ => continue, } } Ok(()) } + + fn get_entry(&self, addr: IoAddress) -> Option<(&IoRange, &Arc)> { + match addr { + IoAddress::Pio(a) => self + .pio_bus + .range(..=&IoRange::new_pio_range(a, 0)) + .nth_back(0), + IoAddress::Mmio(a) => self + .mmio_bus + .range(..=&IoRange::new_mmio_range(a, 0)) + .nth_back(0), + } + } + + // Return the Device mapped `addr` and the base address. + fn get_device(&self, addr: IoAddress) -> Option<(&Arc, IoAddress)> { + if let Some((range, dev)) = self.get_entry(addr) { + if (addr.raw_value() - range.base.raw_value()) < range.size.raw_value() { + return Some((dev, range.base)); + } + } + None + } + + /// A helper function handling PIO read command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn pio_read(&self, addr: u16, data: &mut [u8]) -> Result<()> { + if let Some((device, base)) = self.get_device(IoAddress::Pio(addr)) { + device.read(base, IoAddress::Pio(addr - (base.raw_value() as u16)), data); + Ok(()) + } else { + Err(Error::NoDevice) + } + } + + /// A helper function handling PIO write command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn pio_write(&self, addr: u16, data: &[u8]) -> Result<()> { + if let Some((device, base)) = self.get_device(IoAddress::Pio(addr)) { + device.write(base, IoAddress::Pio(addr - (base.raw_value() as u16)), data); + Ok(()) + } else { + Err(Error::NoDevice) + } + } + + /// A helper function handling MMIO read command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()> { + if let Some((device, base)) = self.get_device(IoAddress::Mmio(addr)) { + device.read(base, IoAddress::Mmio(addr - base.raw_value()), data); + Ok(()) + } else { + Err(Error::NoDevice) + } + } + + /// A helper function handling MMIO write command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()> { + if let Some((device, base)) = self.get_device(IoAddress::Mmio(addr)) { + device.write(base, IoAddress::Mmio(addr - base.raw_value()), data); + Ok(()) + } else { + Err(Error::NoDevice) + } + } } diff --git a/src/lib.rs b/src/lib.rs index 8620bfb..dc09ddb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,9 +3,31 @@ //! rust-vmm device model. +use std::cmp::{Ord, Ordering, PartialOrd}; + pub mod device_manager; pub mod resources; +// IO Size. +#[derive(Debug, Copy, Clone)] +enum IoSize { + // Port I/O size. + Pio(u16), + + // Memory mapped I/O size. + Mmio(u64), +} + +impl IoSize { + // Get the raw value as u64 to make operation simple. + fn raw_value(&self) -> u64 { + match *self { + IoSize::Pio(p) => u64::from(p), + IoSize::Mmio(m) => m, + } + } +} + /// IO Addresses. #[derive(Debug, Copy, Clone)] pub enum IoAddress { @@ -16,6 +38,36 @@ pub enum IoAddress { Mmio(u64), } +impl IoAddress { + // Get the raw value of IO Address to make operation simple. + fn raw_value(&self) -> u64 { + match *self { + IoAddress::Pio(p) => u64::from(p), + IoAddress::Mmio(m) => m, + } + } +} + +impl Eq for IoAddress {} + +impl PartialEq for IoAddress { + fn eq(&self, other: &IoAddress) -> bool { + self.raw_value() == other.raw_value() + } +} + +impl Ord for IoAddress { + fn cmp(&self, other: &IoAddress) -> Ordering { + self.raw_value().cmp(&other.raw_value()) + } +} + +impl PartialOrd for IoAddress { + fn partial_cmp(&self, other: &IoAddress) -> Option { + self.raw_value().partial_cmp(&other.raw_value()) + } +} + /// Device IO trait. /// A device supporting memory based I/O should implement this trait, then /// register itself against the different IO type ranges it handles. From 66166a385ab2e6a41cb2fc879c439fc299d6b6f6 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Sat, 23 Nov 2019 02:51:49 +0800 Subject: [PATCH 08/29] Add unit tests Unit tests for IO manager. Signed-off-by: Jing Liu --- coverage_config.json | 2 +- src/device_manager.rs | 122 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/coverage_config.json b/coverage_config.json index 03bbeba..c466612 100644 --- a/coverage_config.json +++ b/coverage_config.json @@ -1,5 +1,5 @@ { - "coverage_score": 75.8, + "coverage_score": 78.7, "exclude_path": "", "crate_features": "" } diff --git a/src/device_manager.rs b/src/device_manager.rs index 11471a8..bbbb857 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -234,3 +234,125 @@ impl IoManager { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + + const PIO_ADDRESS_SIZE: u16 = 4; + const PIO_ADDRESS_BASE: u16 = 0x40; + const MMIO_ADDRESS_SIZE: u64 = 0x8765_4321; + const MMIO_ADDRESS_BASE: u64 = 0x1234_5678; + const LEGACY_IRQ: u32 = 4; + const CONFIG_DATA: u32 = 0x1234; + + struct DummyDevice { + config: Mutex, + } + + impl DummyDevice { + fn new(config: u32) -> Self { + DummyDevice { + config: Mutex::new(config), + } + } + } + + impl DeviceIo for DummyDevice { + fn read(&self, _base: IoAddress, _offset: IoAddress, data: &mut [u8]) { + if data.len() > 4 { + return; + } + for (idx, iter) in data.iter_mut().enumerate() { + let config = self.config.lock().expect("failed to acquire lock"); + *iter = (*config >> (idx * 8) & 0xff) as u8; + } + } + + fn write(&self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { + let mut config = self.config.lock().expect("failed to acquire lock"); + *config = u32::from(data[0]) & 0xff; + } + } + + #[test] + fn test_register_unregister_device_io() { + let mut io_mgr = IoManager::new(); + let dummy = DummyDevice::new(0); + let dum = Arc::new(dummy); + + let mut resource: Vec = Vec::new(); + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let irq = Resource::LegacyIrq(LEGACY_IRQ); + + resource.push(mmio); + resource.push(irq); + + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + assert!(io_mgr.unregister_device_io(&resource).is_ok()) + } + + #[test] + fn test_mmio_read_write() { + let mut io_mgr: IoManager = Default::default(); + let dum = Arc::new(DummyDevice::new(CONFIG_DATA)); + let mut resource: Vec = Vec::new(); + + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + resource.push(mmio); + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let mut data = [0; 4]; + assert!(io_mgr.mmio_read(MMIO_ADDRESS_BASE, &mut data).is_ok()); + assert_eq!(data, [0x34, 0x12, 0, 0]); + + assert!(io_mgr + .mmio_read(MMIO_ADDRESS_BASE + MMIO_ADDRESS_SIZE, &mut data) + .is_err()); + + data = [0; 4]; + assert!(io_mgr.mmio_write(MMIO_ADDRESS_BASE, &data).is_ok()); + assert_eq!(*dum.config.lock().unwrap(), 0); + + assert!(io_mgr + .mmio_write(MMIO_ADDRESS_BASE + MMIO_ADDRESS_SIZE, &data) + .is_err()); + } + + #[test] + fn test_pio_read_write() { + let mut io_mgr: IoManager = Default::default(); + let dum = Arc::new(DummyDevice::new(CONFIG_DATA)); + let mut resource: Vec = Vec::new(); + + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + resource.push(pio); + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let mut data = [0; 4]; + assert!(io_mgr.pio_read(PIO_ADDRESS_BASE, &mut data).is_ok()); + assert_eq!(data, [0x34, 0x12, 0, 0]); + + assert!(io_mgr + .pio_read(PIO_ADDRESS_BASE + PIO_ADDRESS_SIZE, &mut data) + .is_err()); + + data = [0; 4]; + assert!(io_mgr.pio_write(PIO_ADDRESS_BASE, &data).is_ok()); + assert_eq!(*dum.config.lock().unwrap(), 0); + + assert!(io_mgr + .pio_write(PIO_ADDRESS_BASE + PIO_ADDRESS_SIZE, &data) + .is_err()); + } +} From 3daea681b4f840f273d5e8780f2a88d59487dac4 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 18 Dec 2019 20:54:32 +0800 Subject: [PATCH 09/29] Append missing tests for resources Append missing tests for resources and fix some typo. Signed-off-by: Jing Liu --- coverage_config.json | 2 +- src/resources.rs | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/coverage_config.json b/coverage_config.json index c466612..a9e9a75 100644 --- a/coverage_config.json +++ b/coverage_config.json @@ -1,5 +1,5 @@ { - "coverage_score": 78.7, + "coverage_score": 79.9, "exclude_path": "", "crate_features": "" } diff --git a/src/resources.rs b/src/resources.rs index 5ae37dd..8a74ea8 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -401,7 +401,7 @@ mod tests { assert_eq!(align, 0x1000); assert_eq!(size, 0x2000); } else { - panic!("Pio resource constraint is invalid."); + panic!("Mmio resource constraint is invalid."); } if let ResourceConstraint::MmioAddress { range, align, size } = @@ -411,7 +411,24 @@ mod tests { assert_eq!(align, 0x2000); assert_eq!(size, 0x2000); } else { - panic!("Pio resource constraint is invalid."); + panic!("Mmio resource constraint is invalid."); + } + + if let ResourceConstraint::LegacyIrq { irq } = + ResourceConstraint::new_legacy_irq(Some(0x123)) + { + assert_eq!(irq, Some(0x123)); + } else { + panic!("IRQ resource constraint is invalid."); + } + + if let ResourceConstraint::KvmMemSlot { slot, size } = + ResourceConstraint::new_kvm_mem_slot(0x1000, Some(0x2000)) + { + assert_eq!(slot, Some(0x2000)); + assert_eq!(size, 0x1000); + } else { + panic!("KVM slot resource constraint is invalid."); } } } From 2f4c51ceb43d1e34bda978bab7c1ca6eacafa4bc Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 1 Feb 2020 11:30:42 +0800 Subject: [PATCH 10/29] Introduce Mutex adapter for DeviceIo Many device backend drivers will mutate itself when handling IO requests. The DeviceIo trait assumes interior mutability, but it's a little complex to support interior mutability. So introduce the Mutex adapter to ease device backend driver implementations. And the Mutex adapter is an zero overhead abstraction without performance penalty. Liu Jiang --- src/lib.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dc09ddb..53aecc3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ //! rust-vmm device model. use std::cmp::{Ord, Ordering, PartialOrd}; +use std::sync::Mutex; pub mod device_manager; pub mod resources; @@ -68,13 +69,17 @@ impl PartialOrd for IoAddress { } } -/// Device IO trait. +/// Device IO trait adopting interior mutability pattern. +/// /// A device supporting memory based I/O should implement this trait, then /// register itself against the different IO type ranges it handles. /// The VMM will then dispatch IO (PIO or MMIO) VM exits by calling into the /// registered devices read or write method from this trait. -/// The DeviceIo trait adopts the interior mutability pattern -/// so we can get a real multiple threads handling. +/// +/// The DeviceIo trait adopts the interior mutability pattern so we can get a +/// real concurrent multiple threads handling. For device backend drivers not +/// focusing on high performance, they may use the Mutex +/// adapter to simplify implementation. pub trait DeviceIo: Send { /// Read from the guest physical address `base`, starting at `offset`. /// Result is placed in `data`. @@ -83,3 +88,69 @@ pub trait DeviceIo: Send { /// Write `data` to the guest physical address `base`, starting from `offset`. fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]); } + +/// Device IO trait without interior mutability. +/// +/// Many device backend drivers will mutate itself when handling IO requests. +/// The DeviceIo trait assumes interior mutability, but it's a little complex +/// to support interior mutability. So the Mutex adapter may be +/// used to ease device backend driver implementations. +/// +/// The Mutex adapter is an zero overhead abstraction without +/// performance penalty. +pub trait DeviceIoMut: Send { + /// Read from the guest physical address `base`, starting at `offset`. + /// Result is placed in `data`. + fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]); + + /// Write `data` to the guest physical address `base`, starting from `offset`. + fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]); +} + +impl DeviceIo for Mutex { + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().read(base, offset, data) + } + + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().write(base, offset, data) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + #[derive(Default)] + struct MockDevice { + data: u8, + } + + impl DeviceIoMut for MockDevice { + fn read(&mut self, _base: IoAddress, _offset: IoAddress, data: &mut [u8]) { + data[0] = self.data; + } + + fn write(&mut self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { + self.data = data[0]; + } + } + + fn register_device(device: Arc) { + device.write(IoAddress::Mmio(0), IoAddress::Mmio(0), &[0x10u8]); + let mut buf = [0x0u8]; + device.read(IoAddress::Mmio(0), IoAddress::Mmio(0), &mut buf); + assert_eq!(buf[0], 0x10); + } + + #[test] + fn test_device_io_mut_adapter() { + let device_mut = Arc::new(Mutex::new(MockDevice::default())); + + register_device(device_mut.clone()); + assert_eq!(device_mut.lock().unwrap().data, 0x010); + } +} From 5bd69ad6f49866c5bdb97de63d814dad48d8a1ba Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 1 Feb 2020 14:29:18 +0800 Subject: [PATCH 11/29] Make DeviceIo depend on Sync Previously DeviceIo depends on Send, but doesn't depend on Sync, which fails to share Arc among vCPU threads. So make DeviceIo depend on Sync too. Signed-off-by: Liu Jiang --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 53aecc3..6badf06 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ impl PartialOrd for IoAddress { /// real concurrent multiple threads handling. For device backend drivers not /// focusing on high performance, they may use the Mutex /// adapter to simplify implementation. -pub trait DeviceIo: Send { +pub trait DeviceIo: Send + Sync { /// Read from the guest physical address `base`, starting at `offset`. /// Result is placed in `data`. fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]); From 1d55fc89a3c26acaa7c99676507ebff7e93d32d2 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 1 Feb 2020 15:01:12 +0800 Subject: [PATCH 12/29] Implement Clone for IoManager Implement Clone for IoManager, which will be needed for RCU-style device hotplug. Liu Jiang --- src/device_manager.rs | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/device_manager.rs b/src/device_manager.rs index bbbb857..5d7829a 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -74,7 +74,7 @@ impl PartialOrd for IoRange { } /// System IO manager serving for all devices management and VM exit handling. -#[derive(Default)] +#[derive(Clone, Default)] pub struct IoManager { /// Range mapping for VM exit pio operations. pio_bus: BTreeMap>, @@ -276,6 +276,45 @@ mod tests { } } + #[test] + fn test_clone_io_manager() { + let mut io_mgr = IoManager::new(); + let dummy = DummyDevice::new(0); + let dum = Arc::new(dummy); + + let mut resource: Vec = Vec::new(); + let mmio = Resource::MmioAddressRange { + base: MMIO_ADDRESS_BASE, + size: MMIO_ADDRESS_SIZE, + }; + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + let irq = Resource::LegacyIrq(LEGACY_IRQ); + + resource.push(mmio); + resource.push(pio); + resource.push(irq); + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); + + let io_mgr2 = io_mgr.clone(); + assert_eq!(io_mgr2.pio_bus.len(), 1); + assert_eq!(io_mgr2.mmio_bus.len(), 1); + + let (dev, addr) = io_mgr2 + .get_device(IoAddress::Mmio(MMIO_ADDRESS_BASE + 1)) + .unwrap(); + assert_eq!(Arc::strong_count(dev), 5); + assert_eq!(addr, IoAddress::Mmio(MMIO_ADDRESS_BASE)); + + drop(io_mgr); + assert_eq!(Arc::strong_count(dev), 3); + + drop(io_mgr2); + assert_eq!(Arc::strong_count(&dum), 1); + } + #[test] fn test_register_unregister_device_io() { let mut io_mgr = IoManager::new(); From 3fc25ea21d1f631b0e241d26b0bee9e23c8752bb Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Fri, 7 Feb 2020 15:38:11 +0800 Subject: [PATCH 13/29] Export IoSize and IoRange as pub Export IoSize and IoRange as pub, which may be reused. Liu Jiang --- src/device_manager.rs | 4 ++-- src/lib.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/device_manager.rs b/src/device_manager.rs index 5d7829a..fec09a1 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -31,9 +31,9 @@ pub enum Error { /// Simplify the `Result` type. pub type Result = result::Result; -// Structure describing an IO range. +/// Structure describing an IO range. #[derive(Debug, Copy, Clone)] -struct IoRange { +pub struct IoRange { base: IoAddress, size: IoSize, } diff --git a/src/lib.rs b/src/lib.rs index 6badf06..ed2cc89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,13 +9,13 @@ use std::sync::Mutex; pub mod device_manager; pub mod resources; -// IO Size. +/// IO Size. #[derive(Debug, Copy, Clone)] -enum IoSize { - // Port I/O size. +pub enum IoSize { + /// Port I/O size. Pio(u16), - // Memory mapped I/O size. + /// Memory mapped I/O size. Mmio(u64), } From bdf834e0a4febae35d090970229e3e74105ccaef Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Fri, 24 Jan 2020 19:25:09 +0800 Subject: [PATCH 14/29] Implement Deref for DeviceResources Implement Deref for DeviceResources, so we could walk all resource entries in an Resources object. Signed-off-by: Liu Jiang --- src/resources.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/resources.rs b/src/resources.rs index 8a74ea8..00618a3 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -12,6 +12,7 @@ //! 5) the VMM registers the new device onto corresponding device managers according the allocated //! resources. +use std::ops::Deref; use std::{u16, u32, u64}; /// Enumeration describing a device's resource constraints. @@ -245,6 +246,14 @@ impl DeviceResources { } } +impl Deref for DeviceResources { + type Target = [Resource]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + #[cfg(test)] mod tests { use super::*; @@ -431,4 +440,14 @@ mod tests { panic!("KVM slot resource constraint is invalid."); } } + + #[test] + fn test_resources_deref() { + let resources = get_device_resource(); + let mut count = 0; + for _res in resources.iter() { + count += 1; + } + assert_eq!(count, resources.0.len()); + } } From 11f77fb465752d0ec95449a12c3d65e95039c6e9 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Tue, 4 Feb 2020 23:01:43 +0800 Subject: [PATCH 15/29] resource: derive Debug for resource related structs Add #[derive(Debug)] for resource related data structs, so we could use assert!() and assert_eq!() etc in unit test cases. Signed-off-by: Liu Jiang --- coverage_config.json | 2 +- src/resources.rs | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/coverage_config.json b/coverage_config.json index a9e9a75..363e921 100644 --- a/coverage_config.json +++ b/coverage_config.json @@ -1,5 +1,5 @@ { - "coverage_score": 79.9, + "coverage_score": 83.5, "exclude_path": "", "crate_features": "" } diff --git a/src/resources.rs b/src/resources.rs index 00618a3..89f383d 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -16,6 +16,7 @@ use std::ops::Deref; use std::{u16, u32, u64}; /// Enumeration describing a device's resource constraints. +#[derive(Copy, Clone, Debug, PartialEq)] pub enum ResourceConstraint { /// Constraint for an IO Port address range. PioAddress { @@ -109,7 +110,7 @@ impl ResourceConstraint { } /// Type of Message Singaled Interrupt -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum MsiIrqType { /// PCI MSI IRQ numbers. PciMsi, @@ -121,7 +122,7 @@ pub enum MsiIrqType { /// Enumeration for device resources. #[allow(missing_docs)] -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] pub enum Resource { /// IO Port address range. PioAddressRange { base: u16, size: u16 }, @@ -142,7 +143,7 @@ pub enum Resource { } /// Newtype to store a set of device resources. -#[derive(Default, Clone)] +#[derive(Clone, Debug, Default)] pub struct DeviceResources(Vec); impl DeviceResources { @@ -278,12 +279,16 @@ mod tests { size: PIO_ADDRESS_SIZE, }; let mut resource = DeviceResources::new(); - resource.append(entry); + resource.append(entry.clone()); + assert_eq!(entry, resource[0]); + let entry = Resource::MmioAddressRange { base: MMIO_ADDRESS_BASE, size: MMIO_ADDRESS_SIZE, }; - resource.append(entry); + resource.append(entry.clone()); + assert_eq!(entry, resource[1]); + let entry = Resource::LegacyIrq(LEGACY_IRQ); resource.append(entry); let entry = Resource::MsiIrq { @@ -319,6 +324,25 @@ mod tests { resources.get_pio_address_ranges()[0].0 == PIO_ADDRESS_BASE && resources.get_pio_address_ranges()[0].1 == PIO_ADDRESS_SIZE ); + assert_eq!( + resources[0], + Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + } + ); + assert_ne!(resources[0], resources[1]); + + let resources2 = resources.clone(); + assert_eq!(resources.len(), resources2.len()); + drop(resources); + assert_eq!( + resources2[0], + Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + } + ); } #[test] @@ -383,6 +407,13 @@ mod tests { #[test] fn test_resource_constraint() { + let pio = ResourceConstraint::new_pio(2); + let pio2 = pio.clone(); + let mmio = ResourceConstraint::new_mmio(0x1000); + assert_eq!(pio, pio2); + drop(pio2); + assert_ne!(pio, mmio); + if let ResourceConstraint::PioAddress { range, align, size } = ResourceConstraint::new_pio(2) { From 2d9c7e6249bfc2886e0df68722b956b3010bd082 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 8 Feb 2020 16:02:27 +0800 Subject: [PATCH 16/29] Build dedicated structs/interfaces for Pio The design to multiplex IoAddress/IoSize for MMIO and PIO makes the device driver implmentation a little complex, so build dedicated data structs and interfaces to handle PIO requests. Also make PIO related code x86 specific. Signed-off-by: Liu Jiang --- coverage_config.json | 2 +- src/device_manager.rs | 175 ++++++++++++++++----------- src/lib.rs | 269 +++++++++++++++++++++++++++++++++++------- 3 files changed, 331 insertions(+), 115 deletions(-) diff --git a/coverage_config.json b/coverage_config.json index 363e921..2e04917 100644 --- a/coverage_config.json +++ b/coverage_config.json @@ -1,5 +1,5 @@ { - "coverage_score": 83.5, + "coverage_score": 86.1, "exclude_path": "", "crate_features": "" } diff --git a/src/device_manager.rs b/src/device_manager.rs index fec09a1..047792c 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -12,6 +12,8 @@ //! devices IO ranges, and finally set resources to virtual device. use crate::resources::Resource; +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +use crate::PioAddress; use crate::{DeviceIo, IoAddress, IoSize}; use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; @@ -32,29 +34,29 @@ pub enum Error { pub type Result = result::Result; /// Structure describing an IO range. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Eq)] pub struct IoRange { base: IoAddress, size: IoSize, } impl IoRange { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] fn new_pio_range(base: u16, size: u16) -> Self { IoRange { - base: IoAddress::Pio(base), - size: IoSize::Pio(size), + base: IoAddress(base as u64), + size: IoSize(size as u64), } } + fn new_mmio_range(base: u64, size: u64) -> Self { IoRange { - base: IoAddress::Mmio(base), - size: IoSize::Mmio(size), + base: IoAddress(base), + size: IoSize(size), } } } -impl Eq for IoRange {} - impl PartialEq for IoRange { fn eq(&self, other: &IoRange) -> bool { self.base == other.base @@ -76,6 +78,7 @@ impl PartialOrd for IoRange { /// System IO manager serving for all devices management and VM exit handling. #[derive(Clone, Default)] pub struct IoManager { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] /// Range mapping for VM exit pio operations. pio_bus: BTreeMap>, /// Range mapping for VM exit mmio operations. @@ -87,6 +90,7 @@ impl IoManager { pub fn new() -> Self { IoManager::default() } + /// Register a new device IO with its allocated resources. /// VMM is responsible for providing the allocated resources to virtual device. /// @@ -104,6 +108,7 @@ impl IoManager { // The resources addresses being registered are sucessfully allocated before. for (idx, res) in resources.iter().enumerate() { match *res { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] Resource::PioAddressRange { base, size } => { if self .pio_bus @@ -147,6 +152,7 @@ impl IoManager { pub fn unregister_device_io(&mut self, resources: &[Resource]) -> Result<()> { for res in resources.iter() { match *res { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] Resource::PioAddressRange { base, size } => { self.pio_bus.remove(&IoRange::new_pio_range(base, size)); } @@ -159,40 +165,48 @@ impl IoManager { Ok(()) } - fn get_entry(&self, addr: IoAddress) -> Option<(&IoRange, &Arc)> { - match addr { - IoAddress::Pio(a) => self - .pio_bus - .range(..=&IoRange::new_pio_range(a, 0)) - .nth_back(0), - IoAddress::Mmio(a) => self - .mmio_bus - .range(..=&IoRange::new_mmio_range(a, 0)) - .nth_back(0), - } + /// A helper function handling MMIO read command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()> { + self.get_device(IoAddress(addr)) + .map(|(device, base)| device.read(base, IoAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) + } + + /// A helper function handling MMIO write command during VM exit. + /// The virtual device itself provides mutable ability and thead-safe protection. + /// + /// Return error if failed to get the device. + pub fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()> { + self.get_device(IoAddress(addr)) + .map(|(device, base)| device.write(base, IoAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) } // Return the Device mapped `addr` and the base address. fn get_device(&self, addr: IoAddress) -> Option<(&Arc, IoAddress)> { - if let Some((range, dev)) = self.get_entry(addr) { + let range = IoRange::new_mmio_range(addr.raw_value(), 0); + if let Some((range, dev)) = self.mmio_bus.range(..=&range).nth_back(0) { if (addr.raw_value() - range.base.raw_value()) < range.size.raw_value() { return Some((dev, range.base)); } } None } +} +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +impl IoManager { /// A helper function handling PIO read command during VM exit. /// The virtual device itself provides mutable ability and thead-safe protection. /// /// Return error if failed to get the device. pub fn pio_read(&self, addr: u16, data: &mut [u8]) -> Result<()> { - if let Some((device, base)) = self.get_device(IoAddress::Pio(addr)) { - device.read(base, IoAddress::Pio(addr - (base.raw_value() as u16)), data); - Ok(()) - } else { - Err(Error::NoDevice) - } + self.get_pio_device(PioAddress(addr)) + .map(|(device, base)| device.pio_read(base, PioAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) } /// A helper function handling PIO write command during VM exit. @@ -200,38 +214,20 @@ impl IoManager { /// /// Return error if failed to get the device. pub fn pio_write(&self, addr: u16, data: &[u8]) -> Result<()> { - if let Some((device, base)) = self.get_device(IoAddress::Pio(addr)) { - device.write(base, IoAddress::Pio(addr - (base.raw_value() as u16)), data); - Ok(()) - } else { - Err(Error::NoDevice) - } - } - - /// A helper function handling MMIO read command during VM exit. - /// The virtual device itself provides mutable ability and thead-safe protection. - /// - /// Return error if failed to get the device. - pub fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()> { - if let Some((device, base)) = self.get_device(IoAddress::Mmio(addr)) { - device.read(base, IoAddress::Mmio(addr - base.raw_value()), data); - Ok(()) - } else { - Err(Error::NoDevice) - } + self.get_pio_device(PioAddress(addr)) + .map(|(device, base)| device.pio_write(base, PioAddress(addr - base.raw_value()), data)) + .ok_or(Error::NoDevice) } - /// A helper function handling MMIO write command during VM exit. - /// The virtual device itself provides mutable ability and thead-safe protection. - /// - /// Return error if failed to get the device. - pub fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()> { - if let Some((device, base)) = self.get_device(IoAddress::Mmio(addr)) { - device.write(base, IoAddress::Mmio(addr - base.raw_value()), data); - Ok(()) - } else { - Err(Error::NoDevice) + // Return the Device mapped `addr` and the base address. + fn get_pio_device(&self, addr: PioAddress) -> Option<(&Arc, PioAddress)> { + let range = IoRange::new_pio_range(addr.raw_value(), 0); + if let Some((range, dev)) = self.pio_bus.range(..=&range).nth_back(0) { + if (addr.raw_value() as u64 - range.base.raw_value()) < range.size.raw_value() { + return Some((dev, PioAddress(range.base.0 as u16))); + } } + None } } @@ -240,7 +236,9 @@ mod tests { use super::*; use std::sync::Mutex; + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] const PIO_ADDRESS_SIZE: u16 = 4; + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] const PIO_ADDRESS_BASE: u16 = 0x40; const MMIO_ADDRESS_SIZE: u64 = 0x8765_4321; const MMIO_ADDRESS_BASE: u64 = 0x1234_5678; @@ -274,6 +272,23 @@ mod tests { let mut config = self.config.lock().expect("failed to acquire lock"); *config = u32::from(data[0]) & 0xff; } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_read(&self, _base: PioAddress, _offset: PioAddress, data: &mut [u8]) { + if data.len() > 4 { + return; + } + for (idx, iter) in data.iter_mut().enumerate() { + let config = self.config.lock().expect("failed to acquire lock"); + *iter = (*config >> (idx * 8) & 0xff) as u8; + } + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_write(&self, _base: PioAddress, _offset: PioAddress, data: &[u8]) { + let mut config = self.config.lock().expect("failed to acquire lock"); + *config = u32::from(data[0]) & 0xff; + } } #[test] @@ -287,32 +302,42 @@ mod tests { base: MMIO_ADDRESS_BASE, size: MMIO_ADDRESS_SIZE, }; - let pio = Resource::PioAddressRange { - base: PIO_ADDRESS_BASE, - size: PIO_ADDRESS_SIZE, - }; let irq = Resource::LegacyIrq(LEGACY_IRQ); resource.push(mmio); - resource.push(pio); resource.push(irq); + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + let pio = Resource::PioAddressRange { + base: PIO_ADDRESS_BASE, + size: PIO_ADDRESS_SIZE, + }; + resource.push(pio); + } + assert!(io_mgr.register_device_io(dum.clone(), &resource).is_ok()); let io_mgr2 = io_mgr.clone(); - assert_eq!(io_mgr2.pio_bus.len(), 1); assert_eq!(io_mgr2.mmio_bus.len(), 1); - let (dev, addr) = io_mgr2 - .get_device(IoAddress::Mmio(MMIO_ADDRESS_BASE + 1)) - .unwrap(); - assert_eq!(Arc::strong_count(dev), 5); - assert_eq!(addr, IoAddress::Mmio(MMIO_ADDRESS_BASE)); + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + assert_eq!(io_mgr2.pio_bus.len(), 1); + + let (dev, addr) = io_mgr2 + .get_device(IoAddress(MMIO_ADDRESS_BASE + 1)) + .unwrap(); + assert_eq!(Arc::strong_count(dev), 5); - drop(io_mgr); - assert_eq!(Arc::strong_count(dev), 3); + assert_eq!(addr, IoAddress(MMIO_ADDRESS_BASE)); - drop(io_mgr2); - assert_eq!(Arc::strong_count(&dum), 1); + drop(io_mgr); + assert_eq!(Arc::strong_count(dev), 3); + + drop(io_mgr2); + assert_eq!(Arc::strong_count(&dum), 1); + } } #[test] @@ -365,6 +390,7 @@ mod tests { .is_err()); } + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] #[test] fn test_pio_read_write() { let mut io_mgr: IoManager = Default::default(); @@ -394,4 +420,15 @@ mod tests { .pio_write(PIO_ADDRESS_BASE + PIO_ADDRESS_SIZE, &data) .is_err()); } + + #[test] + fn test_device_manager_data_structs() { + let range1 = IoRange::new_mmio_range(0x1000, 0x1000); + let range2 = IoRange::new_mmio_range(0x1000, 0x2000); + let range3 = IoRange::new_mmio_range(0x2000, 0x1000); + + assert_eq!(range1, range1.clone()); + assert_eq!(range1, range2); + assert!(range1 < range3); + } } diff --git a/src/lib.rs b/src/lib.rs index ed2cc89..a58c6a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,72 +3,168 @@ //! rust-vmm device model. -use std::cmp::{Ord, Ordering, PartialOrd}; +use std::cmp::{Ord, PartialOrd}; use std::sync::Mutex; pub mod device_manager; pub mod resources; /// IO Size. -#[derive(Debug, Copy, Clone)] -pub enum IoSize { - /// Port I/O size. - Pio(u16), - - /// Memory mapped I/O size. - Mmio(u64), -} +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct IoSize(pub u64); impl IoSize { - // Get the raw value as u64 to make operation simple. - fn raw_value(&self) -> u64 { - match *self { - IoSize::Pio(p) => u64::from(p), - IoSize::Mmio(m) => m, - } + /// Get the raw value as u64 to make operation simple. + #[inline] + pub fn raw_value(self) -> u64 { + self.0 } } -/// IO Addresses. -#[derive(Debug, Copy, Clone)] -pub enum IoAddress { - /// Port I/O address. - Pio(u16), +impl From for IoSize { + #[inline] + fn from(size: u64) -> Self { + IoSize(size) + } +} - /// Memory mapped I/O address. - Mmio(u64), +impl From for u64 { + #[inline] + fn from(size: IoSize) -> Self { + size.0 + } } +/// IO Addresses. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct IoAddress(pub u64); + impl IoAddress { - // Get the raw value of IO Address to make operation simple. - fn raw_value(&self) -> u64 { - match *self { - IoAddress::Pio(p) => u64::from(p), - IoAddress::Mmio(m) => m, - } + /// Get the raw value of IO Address to make operation simple. + #[inline] + pub fn raw_value(self) -> u64 { + self.0 } } -impl Eq for IoAddress {} - -impl PartialEq for IoAddress { - fn eq(&self, other: &IoAddress) -> bool { - self.raw_value() == other.raw_value() +impl From for IoAddress { + #[inline] + fn from(addr: u64) -> Self { + IoAddress(addr) } } -impl Ord for IoAddress { - fn cmp(&self, other: &IoAddress) -> Ordering { - self.raw_value().cmp(&other.raw_value()) +impl From for u64 { + #[inline] + fn from(addr: IoAddress) -> Self { + addr.0 } } -impl PartialOrd for IoAddress { - fn partial_cmp(&self, other: &IoAddress) -> Option { - self.raw_value().partial_cmp(&other.raw_value()) +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +mod x86 { + use super::{IoAddress, IoSize}; + use std::convert::TryFrom; + + type PioAddressType = u16; + + #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] + /// Port I/O size. + pub struct PioSize(pub PioAddressType); + + impl PioSize { + /// Get the raw value as u64 to make operation simple. + #[inline] + pub fn raw_value(self) -> PioAddressType { + self.0 + } + } + + impl From for PioSize { + #[inline] + fn from(size: PioAddressType) -> Self { + PioSize(size) + } + } + + impl From for PioAddressType { + #[inline] + fn from(size: PioSize) -> Self { + size.0 + } + } + + impl TryFrom for PioSize { + type Error = IoSize; + + #[inline] + fn try_from(size: IoSize) -> Result { + if size.raw_value() <= std::u16::MAX as u64 { + Ok(PioSize(size.raw_value() as PioAddressType)) + } else { + Err(size) + } + } + } + + impl From for IoSize { + #[inline] + fn from(size: PioSize) -> Self { + IoSize(size.raw_value() as u64) + } + } + + /// Port I/O address. + #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] + pub struct PioAddress(pub PioAddressType); + + impl PioAddress { + /// Get the raw value of IO Address to make operation simple. + #[inline] + pub fn raw_value(self) -> PioAddressType { + self.0 + } + } + + impl From for PioAddress { + #[inline] + fn from(addr: PioAddressType) -> Self { + PioAddress(addr) + } + } + + impl From for PioAddressType { + #[inline] + fn from(addr: PioAddress) -> Self { + addr.0 + } + } + + impl TryFrom for PioAddress { + type Error = IoAddress; + + #[inline] + fn try_from(addr: IoAddress) -> Result { + if addr.0 <= std::u16::MAX as u64 { + Ok(PioAddress(addr.raw_value() as PioAddressType)) + } else { + Err(addr) + } + } + } + + impl From for IoAddress { + #[inline] + fn from(addr: PioAddress) -> Self { + IoAddress(addr.raw_value() as u64) + } } } +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +pub use self::x86::{PioAddress, PioSize}; + +/// IO Addresses. /// Device IO trait adopting interior mutability pattern. /// /// A device supporting memory based I/O should implement this trait, then @@ -80,13 +176,23 @@ impl PartialOrd for IoAddress { /// real concurrent multiple threads handling. For device backend drivers not /// focusing on high performance, they may use the Mutex /// adapter to simplify implementation. +#[allow(unused_variables)] pub trait DeviceIo: Send + Sync { /// Read from the guest physical address `base`, starting at `offset`. /// Result is placed in `data`. - fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]); + fn read(&self, base: IoAddress, offset: IoAddress, data: &mut [u8]) {} /// Write `data` to the guest physical address `base`, starting from `offset`. - fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]); + fn write(&self, base: IoAddress, offset: IoAddress, data: &[u8]) {} + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + /// Read from the guest physical address `base`, starting at `offset`. + /// Result is placed in `data`. + fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) {} + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + /// Write `data` to the guest physical address `base`, starting from `offset`. + fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) {} } /// Device IO trait without interior mutability. @@ -98,13 +204,23 @@ pub trait DeviceIo: Send + Sync { /// /// The Mutex adapter is an zero overhead abstraction without /// performance penalty. +#[allow(unused_variables)] pub trait DeviceIoMut: Send { /// Read from the guest physical address `base`, starting at `offset`. /// Result is placed in `data`. - fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]); + fn read(&mut self, base: IoAddress, offset: IoAddress, data: &mut [u8]) {} + + /// Write `data` to the guest physical address `base`, starting from `offset`. + fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]) {} + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + /// Read from the guest physical address `base`, starting at `offset`. + /// Result is placed in `data`. + fn pio_read(&mut self, base: PioAddress, offset: PioAddress, data: &mut [u8]) {} + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] /// Write `data` to the guest physical address `base`, starting from `offset`. - fn write(&mut self, base: IoAddress, offset: IoAddress, data: &[u8]); + fn pio_write(&mut self, base: PioAddress, offset: PioAddress, data: &[u8]) {} } impl DeviceIo for Mutex { @@ -117,11 +233,25 @@ impl DeviceIo for Mutex { // Safe to unwrap() because we don't expect poisoned lock here. self.lock().unwrap().write(base, offset, data) } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_read(&self, base: PioAddress, offset: PioAddress, data: &mut [u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().pio_read(base, offset, data) + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().pio_write(base, offset, data) + } } #[cfg(test)] mod tests { use super::*; + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + use std::convert::TryFrom; use std::sync::Arc; #[derive(Default)] @@ -137,12 +267,22 @@ mod tests { fn write(&mut self, _base: IoAddress, _offset: IoAddress, data: &[u8]) { self.data = data[0]; } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_read(&mut self, _base: PioAddress, _offset: PioAddress, data: &mut [u8]) { + data[0] = self.data; + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + fn pio_write(&mut self, _base: PioAddress, _offset: PioAddress, data: &[u8]) { + self.data = data[0]; + } } fn register_device(device: Arc) { - device.write(IoAddress::Mmio(0), IoAddress::Mmio(0), &[0x10u8]); + device.write(IoAddress(0), IoAddress(0), &[0x10u8]); let mut buf = [0x0u8]; - device.read(IoAddress::Mmio(0), IoAddress::Mmio(0), &mut buf); + device.read(IoAddress(0), IoAddress(0), &mut buf); assert_eq!(buf[0], 0x10); } @@ -153,4 +293,43 @@ mod tests { register_device(device_mut.clone()); assert_eq!(device_mut.lock().unwrap().data, 0x010); } + + #[test] + fn test_io_data_struct() { + let io_size = IoSize::from(0x1111u64); + assert_eq!(io_size.raw_value(), 0x1111u64); + assert_eq!(u64::from(io_size), 0x1111u64); + assert_eq!(io_size, io_size.clone()); + let io_size1 = IoSize::from(0x1112u64); + assert!(io_size < io_size1); + + let io_addr = IoAddress::from(0x1234u64); + assert_eq!(io_addr.raw_value(), 0x1234u64); + assert_eq!(u64::from(io_addr), 0x1234u64); + assert_eq!(io_addr, io_addr.clone()); + let io_addr1 = IoAddress::from(0x1235u64); + assert!(io_addr < io_addr1); + } + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + #[test] + fn test_pio_data_struct() { + let pio_size = PioSize::from(0x1111u16); + assert_eq!(pio_size.raw_value(), 0x1111u16); + assert_eq!(u16::from(pio_size), 0x1111u16); + assert_eq!(pio_size, pio_size.clone()); + let pio_size1 = PioSize::from(0x1112u16); + assert!(pio_size < pio_size1); + + let pio_addr = PioAddress::from(0x1234u16); + assert_eq!(pio_addr.raw_value(), 0x1234u16); + assert_eq!(u16::from(pio_addr), 0x1234u16); + assert_eq!(pio_addr, pio_addr.clone()); + let pio_addr1 = PioAddress::from(0x1235u16); + assert!(pio_addr < pio_addr1); + + assert!(PioAddress::try_from(IoAddress::from(0x123456u64)).is_err()); + assert!(PioAddress::try_from(IoAddress::from(0x1234u64)).is_ok()); + assert_eq!(IoAddress::from(pio_addr).raw_value(), 0x1234u64); + } } From 6c37861108c727af11b2fa161abcdad363f143c4 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 15 Feb 2020 21:30:59 +0800 Subject: [PATCH 17/29] Introduce IoManagerContext to support PCI devices PCI devices need to register/unregister itself onto the IoManager instance when handling PCI BAR reprogramming. So introduce IoManagerContext trait to support device manager operaiton transaction at runtime. Closure is another option, but it's hard to get information out of a closure when during live upgrading. Signed-off-by: Liu Jiang --- src/device_manager.rs | 77 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/device_manager.rs b/src/device_manager.rs index 047792c..f368036 100644 --- a/src/device_manager.rs +++ b/src/device_manager.rs @@ -18,6 +18,7 @@ use crate::{DeviceIo, IoAddress, IoSize}; use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; use std::collections::btree_map::BTreeMap; +use std::ops::Deref; use std::result; use std::sync::Arc; @@ -231,6 +232,82 @@ impl IoManager { } } +/// Io manager transaction context to register/unregister devices. +pub trait IoManagerContext { + /// Type of context object. + type Context; + + /// Begin a transaction and return a context object. + /// + /// The returned context object must be passed to commit_tx() or cancel_tx() later. + fn begin_tx(&self) -> Self::Context; + + /// Commit the transaction. + fn commit_tx(&self, ctx: Self::Context); + + /// Cancel the transaction. + fn cancel_tx(&self, ctx: Self::Context); + + /// Register a new device IO with its allocated resources. + /// + /// # Arguments + /// + /// * `ctx`: context object returned by begin_tx(). + /// * `device`: device instance object to be registered + /// * `resources`: resources that this device owns, might include + /// port I/O and memory-mapped I/O ranges, irq number, etc. + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> Result<()>; + + /// Unregister a device from `IoManager`, e.g. users specified removing. + /// VMM pre-fetches the resources e.g. dev.get_assigned_resources() + /// VMM is responsible for freeing the resources. + /// + /// # Arguments + /// + /// * `ctx`: context object returned by begin_tx(). + /// * `resources`: resources that this device owns, might include + /// port I/O and memory-mapped I/O ranges, irq number, etc. + fn unregister_device_io(&self, ctx: &mut Self::Context, resources: &[Resource]) -> Result<()>; +} + +impl IoManagerContext for Arc { + type Context = T::Context; + + fn begin_tx(&self) -> Self::Context { + self.deref().begin_tx() + } + + fn commit_tx(&self, ctx: Self::Context) { + self.deref().commit_tx(ctx) + } + + fn cancel_tx(&self, ctx: Self::Context) { + self.deref().cancel_tx(ctx) + } + + fn register_device_io( + &self, + ctx: &mut Self::Context, + device: Arc, + resources: &[Resource], + ) -> std::result::Result<(), Error> { + self.deref().register_device_io(ctx, device, resources) + } + + fn unregister_device_io( + &self, + ctx: &mut Self::Context, + resources: &[Resource], + ) -> std::result::Result<(), Error> { + self.deref().unregister_device_io(ctx, resources) + } +} + #[cfg(test)] mod tests { use super::*; From 09c4eb4acbef77d646399c54b2629ceb3834576c Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Fri, 7 Feb 2020 15:10:46 +0800 Subject: [PATCH 18/29] Add get_assigned_resources()/get_trapped_io_resources() to DeviceIo Now we have get_assigned_resources() to get resources assigned to the device(), and get_trapped_io_resources() to get PIO/MMIO resources the device wants to get trapped. Signed-off-by: Liu Jiang --- src/lib.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index a58c6a0..94d3dc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,8 @@ use std::sync::Mutex; pub mod device_manager; pub mod resources; +use self::resources::DeviceResources; + /// IO Size. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub struct IoSize(pub u64); @@ -193,6 +195,18 @@ pub trait DeviceIo: Send + Sync { #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] /// Write `data` to the guest physical address `base`, starting from `offset`. fn pio_write(&self, base: PioAddress, offset: PioAddress, data: &[u8]) {} + + /// Get resources assigned to the device. + fn get_assigned_resources(&self) -> DeviceResources { + DeviceResources::new() + } + + /// Get the IO resources which will be trapped by the DeviceManager. + /// + /// All none Mmio/Pio resources in the returned resource list will be ignored. + fn get_trapped_io_resources(&self) -> DeviceResources { + self.get_assigned_resources() + } } /// Device IO trait without interior mutability. @@ -221,6 +235,18 @@ pub trait DeviceIoMut: Send { #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] /// Write `data` to the guest physical address `base`, starting from `offset`. fn pio_write(&mut self, base: PioAddress, offset: PioAddress, data: &[u8]) {} + + /// Get resources assigned to the device. + fn get_assigned_resources(&self) -> DeviceResources { + DeviceResources::new() + } + + /// Get the IO resources which will be trapped by the DeviceManager. + /// + /// All none Mmio/Pio resources in the returned resource list will be ignored. + fn get_trapped_io_resources(&self) -> DeviceResources { + self.get_assigned_resources() + } } impl DeviceIo for Mutex { @@ -245,6 +271,16 @@ impl DeviceIo for Mutex { // Safe to unwrap() because we don't expect poisoned lock here. self.lock().unwrap().pio_write(base, offset, data) } + + fn get_assigned_resources(&self) -> DeviceResources { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().get_assigned_resources() + } + + fn get_trapped_io_resources(&self) -> DeviceResources { + // Safe to unwrap() because we don't expect poisoned lock here. + self.lock().unwrap().get_trapped_io_resources() + } } #[cfg(test)] From 14eb9e12cbf2d2f995a9d0bd02c8a6b6a567199f Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Mon, 5 Aug 2019 00:38:48 +0800 Subject: [PATCH 19/29] Switch to rust 2018 edition Switch to rust 2018 edition and turn on deny(missing_docs). Signed-off-by: Liu Jiang --- Cargo.toml | 1 + src/lib.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 42ef5bc..79b04eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,6 @@ version = "0.1.0" authors = ["Samuel Ortiz "] repository = "https://github.com/rust-vmm/vm-device" license = "Apache-2.0" +edition = "2018" [dependencies] diff --git a/src/lib.rs b/src/lib.rs index 94d3dc8..5b55ba3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ // Copyright © 2019 Intel Corporation. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause +#![deny(missing_docs)] + //! rust-vmm device model. use std::cmp::{Ord, PartialOrd}; From 393253bda13fa404da91a1060ed84ef71f2d0a21 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sun, 11 Aug 2019 18:25:06 +0800 Subject: [PATCH 20/29] interrupt: introduce traits to manage interrupt sources Introduce traits InterruptManager and InterruptSourceGroup to manage interrupt sources for virtual devices. Signed-off-by: Liu Jiang Signed-off-by: Bin Zha --- Cargo.toml | 6 ++ src/interrupt/mod.rs | 205 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 212 insertions(+) create mode 100644 src/interrupt/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 79b04eb..ab0ced5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,9 @@ license = "Apache-2.0" edition = "2018" [dependencies] +libc = ">=0.2.39" +vmm-sys-util = "~0" + +[features] +legacy-irq = [] +msi-irq = [] diff --git a/src/interrupt/mod.rs b/src/interrupt/mod.rs new file mode 100644 index 0000000..449cae5 --- /dev/null +++ b/src/interrupt/mod.rs @@ -0,0 +1,205 @@ +// Copyright (C) 2019-2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Traits and Structs to manage interrupt sources for devices. +//! +//! In system programming, an interrupt is a signal to the processor emitted by hardware or +//! software indicating an event that needs immediate attention. An interrupt alerts the processor +//! to a high-priority condition requiring the interruption of the current code the processor is +//! executing. The processor responds by suspending its current activities, saving its state, and +//! executing a function called an interrupt handler (or an interrupt service routine, ISR) to deal +//! with the event. This interruption is temporary, and, after the interrupt handler finishes, +//! unless handling the interrupt has emitted a fatal error, the processor resumes normal +//! activities. +//! +//! Hardware interrupts are used by devices to communicate that they require attention from the +//! operating system, or a bare-metal program running on the CPU if there are no OSes. The act of +//! initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices +//! are usually associated with different interrupts using a unique value associated with each +//! interrupt. This makes it possible to know which hardware device caused which interrupts. +//! These interrupt values are often called IRQ lines, or just interrupt lines. +//! +//! Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. +//! MSI [(Message Signaled Interrupt)](https://en.wikipedia.org/wiki/Message_Signaled_Interrupts) +//! is another commonly used alternative in-band method of signaling an interrupt, using special +//! in-band messages to replace traditional out-of-band assertion of dedicated interrupt lines. +//! While more complex to implement in a device, message signaled interrupts have some significant +//! advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts are +//! supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some non-PCI +//! architectures also use message signaled interrupts. +//! +//! While IRQ is a term commonly used by Operating Systems when dealing with hardware +//! interrupts, the IRQ numbers managed by OSes are independent of the ones managed by VMM. +//! For simplicity sake, the term `Interrupt Source` is used instead of IRQ to represent both pin-based +//! interrupts and MSI interrupts. +//! +//! A device may support multiple types of interrupts, and each type of interrupt may support one +//! or multiple interrupt sources. For example, a PCI device may support: +//! * Legacy Irq: exactly one interrupt source. +//! * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +//! * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. +//! +//! A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. +//! An ID allocator will be used to allocate and free Interrupt Source Identifiers for devices. +//! To decouple the vm-device crate from the ID allocator, the vm-device crate doesn't take the +//! responsibility to allocate/free Interrupt Source IDs but only makes use of assigned IDs. +//! +//! The overall flow to deal with interrupts is: +//! * the VMM creates an interrupt manager +//! * the VMM creates a device manager, passing on an reference to the interrupt manager +//! * the device manager passes on an reference to the interrupt manager to all registered devices +//! * guest kernel loads drivers for virtual devices +//! * guest device driver determines the type and number of interrupts needed, and update the +//! device configuration +//! * the virtual device backend requests the interrupt manager to create an interrupt group +//! according to guest configuration information + +use std::sync::Arc; +use vmm_sys_util::eventfd::EventFd; + +/// Reuse std::io::Result to simplify interoperability among crates. +pub type Result = std::io::Result; + +/// Data type to store an interrupt source identifier. +pub type InterruptIndex = u32; + +/// Type of interrupt source. +#[derive(Clone, Debug)] +pub enum InterruptSourceType { + #[cfg(feature = "legacy-irq")] + /// Legacy Pin-based Interrupt. + /// On x86 platforms, legacy interrupts are routed through 8259 PICs and/or IOAPICs. + LegacyIrq, + #[cfg(feature = "msi-irq")] + /// Message Signaled Interrupt (PCI MSI/PCI MSIx etc). + /// Some non-PCI devices (like HPET on x86) make use of generic MSI in platform specific ways. + MsiIrq, +} + +/// Configuration data for an interrupt source. +#[derive(Clone, Debug)] +pub enum InterruptSourceConfig { + #[cfg(feature = "legacy-irq")] + /// Configuration data for Legacy interrupts. + LegacyIrq(LegacyIrqSourceConfig), + #[cfg(feature = "msi-irq")] + /// Configuration data for PciMsi, PciMsix and generic MSI interrupts. + MsiIrq(MsiIrqSourceConfig), +} + +/// Configuration data for legacy interrupts. +/// +/// On x86 platforms, legacy interrupts means those interrupts routed through PICs or IOAPICs. +#[cfg(feature = "legacy-irq")] +#[derive(Clone, Debug)] +pub struct LegacyIrqSourceConfig {} + +/// Configuration data for GenericMsi, PciMsi, PciMsix interrupts. +#[cfg(feature = "msi-irq")] +#[derive(Copy, Clone, Debug, Default)] +pub struct MsiIrqSourceConfig { + /// High address to deliver message signaled interrupt. + pub high_addr: u32, + /// Low address to deliver message signaled interrupt. + pub low_addr: u32, + /// Data to write to deliver message signaled interrupt. + pub data: u32, +} + +/// Trait to manage interrupt sources for virtual device backends. +/// +/// The InterruptManager implementations should protect itself from concurrent accesses internally, +/// so it could be invoked from multi-threaded context. +pub trait InterruptManager { + /// Create an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object to manage + /// interrupt sources for a virtual device + /// + /// An [InterruptSourceGroup](trait.InterruptSourceGroup.html) object manages all interrupt + /// sources of the same type for a virtual device. + /// + /// # Arguments + /// * type_: type of interrupt source. + /// * base: base Interrupt Source ID to be managed by the group object. + /// * count: number of Interrupt Sources to be managed by the group object. + fn create_group( + &self, + type_: InterruptSourceType, + base: InterruptIndex, + count: InterruptIndex, + ) -> Result>>; + + /// Destroy an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object created by + /// [create_group()](trait.InterruptManager.html#tymethod.create_group). + /// + /// Assume the caller takes the responsibility to disable all interrupt sources of the group + /// before calling destroy_group(). This assumption helps to simplify InterruptSourceGroup + /// implementations. + fn destroy_group(&self, group: Arc>) -> Result<()>; +} + +/// Trait to manage a group of interrupt sources for a device. +/// +/// A device may support several types of interrupts, and each type of interrupt may contain one or +/// multiple continuous interrupt sources. For example, a PCI device may concurrently support: +/// * Legacy Irq: exactly one interrupt source. +/// * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +/// * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. +/// +/// PCI MSI interrupts of a device may not be configured individually, and must configured as a +/// whole block. So all interrupts of the same type of a device are abstracted as an +/// [InterruptSourceGroup](trait.InterruptSourceGroup.html) object, instead of abstracting each +/// interrupt source as a distinct InterruptSource. +#[allow(clippy::len_without_is_empty)] +#[allow(clippy::trivially_copy_pass_by_ref)] +pub trait InterruptSourceGroup: Send + Sync { + /// Get type of interrupt sources managed by the group. + fn interrupt_type(&self) -> InterruptSourceType; + + /// Get number of interrupt sources managed by the group. + fn len(&self) -> InterruptIndex; + + /// Get base of the assigned Interrupt Source Identifiers. + fn base(&self) -> InterruptIndex; + + /// Enable the interrupt sources in the group to generate interrupts. + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()>; + + /// Disable the interrupt sources in the group to generate interrupts. + fn disable(&self) -> Result<()>; + + /// Update the interrupt source group configuration. + /// + /// # Arguments + /// * index: sub-index into the group. + /// * config: configuration data for the interrupt source. + fn update(&self, index: InterruptIndex, config: &InterruptSourceConfig) -> Result<()>; + + /// Returns an interrupt notifier from this interrupt. + /// + /// An interrupt notifier allows for external components and processes + /// to inject interrupts into a guest, by writing to the file returned + /// by this method. + fn notifier(&self, _index: InterruptIndex) -> Option<&EventFd> { + None + } + + /// Inject an interrupt from this interrupt source into the guest. + /// + /// If the interrupt has an associated `interrupt_status` register, all bits set in `flag` + /// will be atomically ORed into the `interrupt_status` register. + fn trigger(&self, index: InterruptIndex) -> Result<()>; + + /// Mask an interrupt from this interrupt source. + fn mask(&self, _index: InterruptIndex) -> Result<()> { + // Not all interrupt sources can be disabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } + + /// Unmask an interrupt from this interrupt source. + fn unmask(&self, _index: InterruptIndex) -> Result<()> { + // Not all interrupt sources can be disabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 5b55ba3..7ace5fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ use std::cmp::{Ord, PartialOrd}; use std::sync::Mutex; pub mod device_manager; +pub mod interrupt; pub mod resources; use self::resources::DeviceResources; From 3e52a6f3a124a90eabdbab9b04e4a6a475d1c14c Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sun, 11 Aug 2019 18:48:23 +0800 Subject: [PATCH 21/29] Implement infrastructure to manage interrupts by KVM Implement infrastructure to manage interrupt sources based on Linux KVM kernel module. Signed-off-by: Liu Jiang Signed-off-by: Bin Zha --- Cargo.toml | 3 + src/interrupt/kvm/mod.rs | 169 +++++++++++++++++++++++++++++++++++++++ src/interrupt/mod.rs | 5 ++ 3 files changed, 177 insertions(+) create mode 100644 src/interrupt/kvm/mod.rs diff --git a/Cargo.toml b/Cargo.toml index ab0ced5..4d5557f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,11 @@ edition = "2018" [dependencies] libc = ">=0.2.39" +kvm-bindings = { version = "~0", optional = true } +kvm-ioctls = { version = "~0", optional = true } vmm-sys-util = "~0" [features] legacy-irq = [] msi-irq = [] +kvm-irq = ["kvm-ioctls", "kvm-bindings"] diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs new file mode 100644 index 0000000..3ed45fd --- /dev/null +++ b/src/interrupt/kvm/mod.rs @@ -0,0 +1,169 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's interrupts based on the Linux KVM framework. +//! +//! When updaing KVM IRQ routing by ioctl(KVM_SET_GSI_ROUTING), all interrupts of the virtual +//! machine must be updated all together. The [KvmIrqRouting](struct.KvmIrqRouting.html) +//! structure is to maintain the global interrupt routing table. + +use std::collections::HashMap; +use std::io::{Error, ErrorKind}; +use std::sync::{Arc, Mutex}; + +use kvm_bindings::{kvm_irq_routing, kvm_irq_routing_entry}; +use kvm_ioctls::VmFd; + +use super::*; + +/// Structure to manage interrupt sources for a virtual machine based on the Linux KVM framework. +/// +/// The KVM framework provides methods to inject interrupts into the target virtual machines, +/// which uses irqfd to notity the KVM kernel module for injecting interrupts. When the interrupt +/// source, usually a virtual device backend in userspace, writes to the irqfd file descriptor, +/// the KVM kernel module will inject a corresponding interrupt into the target VM according to +/// the IRQ routing configuration. +pub struct KvmIrqManager { + mgr: Mutex, +} + +impl KvmIrqManager { + /// Create a new interrupt manager based on the Linux KVM framework. + /// + /// # Arguments + /// * `vmfd`: The KVM VM file descriptor, which will be used to access the KVM subsystem. + pub fn new(vmfd: Arc) -> Self { + let vmfd2 = vmfd.clone(); + KvmIrqManager { + mgr: Mutex::new(KvmIrqManagerObj { + vmfd, + groups: HashMap::new(), + routes: Arc::new(KvmIrqRouting::new(vmfd2)), + }), + } + } + + /// Prepare the interrupt manager for generating interrupts into the target VM. + pub fn initialize(&self) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mgr = self.mgr.lock().unwrap(); + mgr.initialize() + } +} + +impl InterruptManager for KvmIrqManager { + fn create_group( + &self, + ty: InterruptSourceType, + base: InterruptIndex, + count: u32, + ) -> Result>> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut mgr = self.mgr.lock().unwrap(); + mgr.create_group(ty, base, count) + } + + fn destroy_group(&self, group: Arc>) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut mgr = self.mgr.lock().unwrap(); + mgr.destroy_group(group) + } +} + +struct KvmIrqManagerObj { + vmfd: Arc, + routes: Arc, + groups: HashMap>>, +} + +impl KvmIrqManagerObj { + fn initialize(&self) -> Result<()> { + self.routes.initialize()?; + Ok(()) + } + + fn create_group( + &mut self, + ty: InterruptSourceType, + base: InterruptIndex, + count: u32, + ) -> Result>> { + #[allow(unreachable_patterns)] + let group: Arc> = match ty { + _ => return Err(Error::from(ErrorKind::InvalidInput)), + }; + + self.groups.insert(base, group.clone()); + + Ok(group) + } + + fn destroy_group(&mut self, group: Arc>) -> Result<()> { + self.groups.remove(&group.base()); + Ok(()) + } +} + +// Use (entry.type, entry.gsi) as the hash key because entry.gsi can't uniquely identify an +// interrupt source on x86 platforms. The PIC and IOAPIC may share the same GSI on x86 platforms. +fn hash_key(entry: &kvm_irq_routing_entry) -> u64 { + let type1 = match entry.type_ { + #[cfg(feature = "kvm-legacy-irq")] + kvm_bindings::KVM_IRQ_ROUTING_IRQCHIP => unsafe { entry.u.irqchip.irqchip }, + _ => 0u32, + }; + (u64::from(type1) << 48 | u64::from(entry.type_) << 32) | u64::from(entry.gsi) +} + +pub(super) struct KvmIrqRouting { + vm_fd: Arc, + routes: Mutex>, +} + +impl KvmIrqRouting { + pub(super) fn new(vm_fd: Arc) -> Self { + KvmIrqRouting { + vm_fd, + routes: Mutex::new(HashMap::new()), + } + } + + pub(super) fn initialize(&self) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + #[allow(unused_mut)] + let mut routes = self.routes.lock().unwrap(); + + self.set_routing(&*routes) + } + + fn set_routing(&self, routes: &HashMap) -> Result<()> { + // Allocate enough buffer memory. + let elem_sz = std::mem::size_of::(); + let total_sz = std::mem::size_of::() * routes.len() + elem_sz; + let elem_cnt = (total_sz + elem_sz - 1) / elem_sz; + let mut irq_routings = Vec::::with_capacity(elem_cnt); + irq_routings.resize_with(elem_cnt, Default::default); + + // Prepare the irq_routing header. + let mut irq_routing = &mut irq_routings[0]; + irq_routing.nr = routes.len() as u32; + irq_routing.flags = 0; + + // Safe because we have just allocated enough memory above. + let irq_routing_entries = unsafe { irq_routing.entries.as_mut_slice(routes.len()) }; + for (idx, entry) in routes.values().enumerate() { + irq_routing_entries[idx] = *entry; + } + + self.vm_fd + .set_gsi_routing(irq_routing) + .map_err(from_sys_util_errno)?; + + Ok(()) + } +} + +/// Helper function convert from vmm_sys_util::errno::Error to std::io::Error. +pub fn from_sys_util_errno(e: vmm_sys_util::errno::Error) -> std::io::Error { + std::io::Error::from_raw_os_error(e.errno()) +} diff --git a/src/interrupt/mod.rs b/src/interrupt/mod.rs index 449cae5..c870190 100644 --- a/src/interrupt/mod.rs +++ b/src/interrupt/mod.rs @@ -203,3 +203,8 @@ pub trait InterruptSourceGroup: Send + Sync { Ok(()) } } + +#[cfg(feature = "kvm-irq")] +mod kvm; +#[cfg(feature = "kvm-irq")] +pub use self::kvm::KvmIrqManager; From acbc9bb3d0d7f9ead3753db1674f0124f141bcc7 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sun, 11 Aug 2019 19:04:13 +0800 Subject: [PATCH 22/29] Manage x86 legacy interrupts based on KVM Implement InterruptSourceGroup trait to manage x86 legacy interruts. On x86 platforms, pin-based device interrupts connecting to the master PIC, the slave PIC and IOAPICs are named as legacy interrupts. For legacy interrupts, the interrupt routing logic are manged by the PICs/IOAPICs and the interrupt group logic only takes responsibility to enable/disable the interrupts. Signed-off-by: Liu Jiang Signed-off-by: Bin Zha --- Cargo.toml | 1 + src/interrupt/kvm/legacy_irq.rs | 150 ++++++++++++++++++++++++++++++++ src/interrupt/kvm/mod.rs | 15 ++++ 3 files changed, 166 insertions(+) create mode 100644 src/interrupt/kvm/legacy_irq.rs diff --git a/Cargo.toml b/Cargo.toml index 4d5557f..9e07953 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ vmm-sys-util = "~0" legacy-irq = [] msi-irq = [] kvm-irq = ["kvm-ioctls", "kvm-bindings"] +kvm-legacy-irq = ["legacy-irq", "kvm-irq"] diff --git a/src/interrupt/kvm/legacy_irq.rs b/src/interrupt/kvm/legacy_irq.rs new file mode 100644 index 0000000..c83cb65 --- /dev/null +++ b/src/interrupt/kvm/legacy_irq.rs @@ -0,0 +1,150 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's legacy interrupts based on Linux KVM framework. +//! +//! On x86 platforms, legacy interrupts are those managed by the Master PIC, the slave PIC and +//! IOAPICs. + +use super::*; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use kvm_bindings::{ + KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_IRQ_ROUTING_IRQCHIP, +}; + +pub(super) struct LegacyIrq { + base: u32, + vmfd: Arc, + irqfd: EventFd, +} + +impl LegacyIrq { + #[allow(clippy::new_ret_no_self)] + pub(super) fn new( + base: InterruptIndex, + count: InterruptIndex, + vmfd: Arc, + _routes: Arc, + ) -> Result { + if count != 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + Ok(LegacyIrq { + base, + vmfd, + irqfd: EventFd::new(0)?, + }) + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn add_legacy_entry( + gsi: u32, + chip: u32, + pin: u32, + routes: &mut HashMap, + ) -> Result<()> { + let mut entry = kvm_irq_routing_entry { + gsi, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + // Safe because we are initializing all fields of the `irqchip` struct. + entry.u.irqchip.irqchip = chip; + entry.u.irqchip.pin = pin; + routes.insert(hash_key(&entry), entry); + + Ok(()) + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// Build routings for IRQs connected to the master PIC, the slave PIC or the first IOAPIC. + pub(super) fn initialize_legacy( + routes: &mut HashMap, + ) -> Result<()> { + // Build routings for the master PIC + for i in 0..8 { + if i != 2 { + Self::add_legacy_entry(i, KVM_IRQCHIP_PIC_MASTER, i, routes)?; + } + } + + // Build routings for the slave PIC + for i in 8..16 { + Self::add_legacy_entry(i, KVM_IRQCHIP_PIC_SLAVE, i - 8, routes)?; + } + + // Build routings for the first IOAPIC + for i in 0..24 { + if i == 0 { + Self::add_legacy_entry(i, KVM_IRQCHIP_IOAPIC, 2, routes)?; + } else if i != 2 { + Self::add_legacy_entry(i, KVM_IRQCHIP_IOAPIC, i, routes)?; + }; + } + + Ok(()) + } + + #[cfg(any(target_arch = "aarch", target_arch = "aarch64"))] + pub(super) fn initialize_legacy( + _routes: &mut HashMap, + ) -> Result<()> { + //TODO + Ok(()) + } +} + +impl InterruptSourceGroup for LegacyIrq { + fn interrupt_type(&self) -> InterruptSourceType { + InterruptSourceType::LegacyIrq + } + + fn len(&self) -> u32 { + 1 + } + + fn base(&self) -> u32 { + self.base + } + + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()> { + if configs.len() != 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + // The IRQ routings for legacy IRQs have been configured during + // KvmIrqManager::initialize(), so only need to register irqfd to the KVM driver. + self.vmfd + .register_irqfd(&self.irqfd, self.base) + .map_err(from_sys_util_errno) + } + + fn disable(&self) -> Result<()> { + self.vmfd + .unregister_irqfd(&self.irqfd, self.base) + .map_err(from_sys_util_errno) + } + + fn update(&self, index: InterruptIndex, _config: &InterruptSourceConfig) -> Result<()> { + // For legacy interrupts, the routing configuration is managed by the PIC/IOAPIC interrupt + // controller drivers, so nothing to do here. + if index != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + Ok(()) + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + if index != 0 { + None + } else { + Some(&self.irqfd) + } + } + + fn trigger(&self, index: InterruptIndex) -> Result<()> { + if index != 0 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + self.irqfd.write(1) + } +} diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index 3ed45fd..4f26ca2 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -16,6 +16,11 @@ use kvm_ioctls::VmFd; use super::*; +#[cfg(feature = "kvm-legacy-irq")] +mod legacy_irq; +#[cfg(feature = "kvm-legacy-irq")] +use self::legacy_irq::LegacyIrq; + /// Structure to manage interrupt sources for a virtual machine based on the Linux KVM framework. /// /// The KVM framework provides methods to inject interrupts into the target virtual machines, @@ -90,6 +95,13 @@ impl KvmIrqManagerObj { ) -> Result>> { #[allow(unreachable_patterns)] let group: Arc> = match ty { + #[cfg(feature = "kvm-legacy-irq")] + InterruptSourceType::LegacyIrq => Arc::new(Box::new(LegacyIrq::new( + base, + count, + self.vmfd.clone(), + self.routes.clone(), + )?)), _ => return Err(Error::from(ErrorKind::InvalidInput)), }; @@ -133,6 +145,9 @@ impl KvmIrqRouting { #[allow(unused_mut)] let mut routes = self.routes.lock().unwrap(); + #[cfg(feature = "kvm-legacy-irq")] + LegacyIrq::initialize_legacy(&mut *routes)?; + self.set_routing(&*routes) } From cc3f30411b3ba68348e1c45f165ac7060f5dfede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=88=E6=83=85?= Date: Fri, 16 Aug 2019 11:03:49 +0800 Subject: [PATCH 23/29] Limit number of legacy irqs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With some kvm version, setting irq_routing for non-existing legaccy IRQs may cause system crash. So limit the number to available legacy interrupts. Signed-off-by: 守情 --- src/interrupt/kvm/legacy_irq.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/interrupt/kvm/legacy_irq.rs b/src/interrupt/kvm/legacy_irq.rs index c83cb65..b728f2a 100644 --- a/src/interrupt/kvm/legacy_irq.rs +++ b/src/interrupt/kvm/legacy_irq.rs @@ -12,6 +12,9 @@ use kvm_bindings::{ KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_IRQ_ROUTING_IRQCHIP, }; +/// Maximum number of legacy interrupts supported. +pub const MAX_LEGACY_IRQS: u32 = 24; + pub(super) struct LegacyIrq { base: u32, vmfd: Arc, @@ -29,6 +32,11 @@ impl LegacyIrq { if count != 1 { return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); } + + if base >= MAX_LEGACY_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + Ok(LegacyIrq { base, vmfd, @@ -74,7 +82,7 @@ impl LegacyIrq { } // Build routings for the first IOAPIC - for i in 0..24 { + for i in 0..MAX_LEGACY_IRQS { if i == 0 { Self::add_legacy_entry(i, KVM_IRQCHIP_IOAPIC, 2, routes)?; } else if i != 2 { From 3c663a8cb51aa643b094ca6355890393aef03f07 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sun, 27 Oct 2019 00:43:23 +0800 Subject: [PATCH 24/29] Add generic heplers to manage MSI interrupts Introduce generic mechanism to support message signalled interrupts based on KVM hypervisor. Signed-off-by: Liu Jiang Signed-off-by: Bin Zha --- Cargo.toml | 1 + src/interrupt/kvm/mod.rs | 45 +++++++++++++++++++++++++ src/interrupt/kvm/msi_generic.rs | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 src/interrupt/kvm/msi_generic.rs diff --git a/Cargo.toml b/Cargo.toml index 9e07953..6d8c96e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,5 @@ vmm-sys-util = "~0" legacy-irq = [] msi-irq = [] kvm-irq = ["kvm-ioctls", "kvm-bindings"] +kvm-msi-generic = ["msi-irq", "kvm-irq"] kvm-legacy-irq = ["legacy-irq", "kvm-irq"] diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index 4f26ca2..f7b2742 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -20,6 +20,11 @@ use super::*; mod legacy_irq; #[cfg(feature = "kvm-legacy-irq")] use self::legacy_irq::LegacyIrq; +#[cfg(feature = "kvm-msi-generic")] +mod msi_generic; + +/// Maximum number of global interrupt sources. +pub const MAX_IRQS: InterruptIndex = 1024; /// Structure to manage interrupt sources for a virtual machine based on the Linux KVM framework. /// @@ -178,6 +183,46 @@ impl KvmIrqRouting { } } +#[cfg(feature = "kvm-msi-generic")] +impl KvmIrqRouting { + pub(super) fn add(&self, entries: &[kvm_irq_routing_entry]) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + for entry in entries { + if entry.gsi >= MAX_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } else if routes.contains_key(&hash_key(entry)) { + return Err(std::io::Error::from_raw_os_error(libc::EEXIST)); + } + } + + for entry in entries { + let _ = routes.insert(hash_key(entry), *entry); + } + self.set_routing(&routes) + } + + pub(super) fn remove(&self, entries: &[kvm_irq_routing_entry]) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + for entry in entries { + let _ = routes.remove(&hash_key(entry)); + } + self.set_routing(&routes) + } + + pub(super) fn modify(&self, entry: &kvm_irq_routing_entry) -> Result<()> { + // Safe to unwrap because there's no legal way to break the mutex. + let mut routes = self.routes.lock().unwrap(); + if !routes.contains_key(&hash_key(entry)) { + return Err(std::io::Error::from_raw_os_error(libc::ENOENT)); + } + + let _ = routes.insert(hash_key(entry), *entry); + self.set_routing(&routes) + } +} + /// Helper function convert from vmm_sys_util::errno::Error to std::io::Error. pub fn from_sys_util_errno(e: vmm_sys_util::errno::Error) -> std::io::Error { std::io::Error::from_raw_os_error(e.errno()) diff --git a/src/interrupt/kvm/msi_generic.rs b/src/interrupt/kvm/msi_generic.rs new file mode 100644 index 0000000..755a5c5 --- /dev/null +++ b/src/interrupt/kvm/msi_generic.rs @@ -0,0 +1,58 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Helper utilities for handling MSI interrupts. + +use kvm_bindings::{kvm_irq_routing_entry, KVM_IRQ_ROUTING_MSI}; + +use super::*; + +pub(super) struct MsiConfig { + pub(super) irqfd: EventFd, + pub(super) config: Mutex, +} + +impl MsiConfig { + pub(super) fn new() -> Self { + MsiConfig { + irqfd: EventFd::new(0).unwrap(), + config: Mutex::new(Default::default()), + } + } +} + +pub(super) fn new_msi_routing_entry( + gsi: InterruptIndex, + msicfg: &MsiIrqSourceConfig, +) -> kvm_irq_routing_entry { + let mut entry = kvm_irq_routing_entry { + gsi, + type_: KVM_IRQ_ROUTING_MSI, + flags: 0, + ..Default::default() + }; + entry.u.msi.address_hi = msicfg.high_addr; + entry.u.msi.address_lo = msicfg.low_addr; + entry.u.msi.data = msicfg.data; + entry +} + +#[allow(irrefutable_let_patterns)] +pub(super) fn create_msi_routing_entries( + base: InterruptIndex, + configs: &[InterruptSourceConfig], +) -> Result> { + let _ = base + .checked_add(configs.len() as u32) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EINVAL))?; + let mut entries = Vec::with_capacity(configs.len()); + for (i, ref val) in configs.iter().enumerate() { + if let InterruptSourceConfig::MsiIrq(msicfg) = val { + let entry = new_msi_routing_entry(base + i as u32, msicfg); + entries.push(entry); + } else { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + } + Ok(entries) +} From 7831a5bd51aadbe00b3f7fe96660bc3debdfe9bd Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Mon, 5 Aug 2019 00:40:08 +0800 Subject: [PATCH 25/29] Manage PCI MSI/PCI MSI-x interrupts Implement interrupt source driver to manage PCI MSI/MSI-x interrupts. Signed-off-by: Liu Jiang Signed-off-by: Bin Zha --- Cargo.toml | 1 + src/interrupt/kvm/mod.rs | 23 ++++++ src/interrupt/kvm/msi_irq.rs | 144 +++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 src/interrupt/kvm/msi_irq.rs diff --git a/Cargo.toml b/Cargo.toml index 6d8c96e..1c0e80d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,4 @@ msi-irq = [] kvm-irq = ["kvm-ioctls", "kvm-bindings"] kvm-msi-generic = ["msi-irq", "kvm-irq"] kvm-legacy-irq = ["legacy-irq", "kvm-irq"] +kvm-msi-irq = ["kvm-msi-generic"] diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index f7b2742..ee96fcd 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -22,10 +22,17 @@ mod legacy_irq; use self::legacy_irq::LegacyIrq; #[cfg(feature = "kvm-msi-generic")] mod msi_generic; +#[cfg(feature = "kvm-msi-irq")] +mod msi_irq; +#[cfg(feature = "kvm-msi-irq")] +use self::msi_irq::MsiIrq; /// Maximum number of global interrupt sources. pub const MAX_IRQS: InterruptIndex = 1024; +/// Default maximum number of Message Signaled Interrupts per device. +pub const DEFAULT_MAX_MSI_IRQS_PER_DEVICE: InterruptIndex = 128; + /// Structure to manage interrupt sources for a virtual machine based on the Linux KVM framework. /// /// The KVM framework provides methods to inject interrupts into the target virtual machines, @@ -49,6 +56,7 @@ impl KvmIrqManager { vmfd, groups: HashMap::new(), routes: Arc::new(KvmIrqRouting::new(vmfd2)), + max_msi_irqs: DEFAULT_MAX_MSI_IRQS_PER_DEVICE, }), } } @@ -59,6 +67,12 @@ impl KvmIrqManager { let mgr = self.mgr.lock().unwrap(); mgr.initialize() } + + /// Set maximum supported MSI interrupts per device. + pub fn set_max_msi_irqs(&self, max_msi_irqs: InterruptIndex) { + let mut mgr = self.mgr.lock().unwrap(); + mgr.max_msi_irqs = max_msi_irqs; + } } impl InterruptManager for KvmIrqManager { @@ -84,6 +98,7 @@ struct KvmIrqManagerObj { vmfd: Arc, routes: Arc, groups: HashMap>>, + max_msi_irqs: InterruptIndex, } impl KvmIrqManagerObj { @@ -107,6 +122,14 @@ impl KvmIrqManagerObj { self.vmfd.clone(), self.routes.clone(), )?)), + #[cfg(feature = "kvm-msi-irq")] + InterruptSourceType::MsiIrq => Arc::new(Box::new(MsiIrq::new( + base, + count, + self.max_msi_irqs, + self.vmfd.clone(), + self.routes.clone(), + )?)), _ => return Err(Error::from(ErrorKind::InvalidInput)), }; diff --git a/src/interrupt/kvm/msi_irq.rs b/src/interrupt/kvm/msi_irq.rs new file mode 100644 index 0000000..8e10ac5 --- /dev/null +++ b/src/interrupt/kvm/msi_irq.rs @@ -0,0 +1,144 @@ +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Manage virtual device's PCI MSI/PCI MSIx interrupts based on Linux KVM framework. +//! +//! To optimize for performance by avoiding unnecessary locking and state checking, we assume that +//! the caller will take the responsibility to maintain the interrupt states and only issue valid +//! requests to this driver. If the caller doesn't obey the contract, only the current virtual +//! machine will be affected, it shouldn't break the host or other virtual machines. + +use super::msi_generic::{create_msi_routing_entries, new_msi_routing_entry, MsiConfig}; +use super::*; + +pub(super) struct MsiIrq { + base: InterruptIndex, + count: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + msi_configs: Vec, +} + +impl MsiIrq { + #[allow(clippy::new_ret_no_self)] + pub(super) fn new( + base: InterruptIndex, + count: InterruptIndex, + max_msi_irqs: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + ) -> Result { + if count > max_msi_irqs || base >= MAX_IRQS || base + count > MAX_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let mut msi_configs = Vec::with_capacity(count as usize); + for _ in 0..count { + msi_configs.push(MsiConfig::new()); + } + + Ok(MsiIrq { + base, + count, + vmfd, + irq_routing, + msi_configs, + }) + } +} + +impl InterruptSourceGroup for MsiIrq { + fn interrupt_type(&self) -> InterruptSourceType { + InterruptSourceType::MsiIrq + } + + fn len(&self) -> u32 { + self.count + } + + fn base(&self) -> u32 { + self.base + } + + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()> { + if configs.len() != self.count as usize { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + // First add IRQ routings for all the MSI interrupts. + let entries = create_msi_routing_entries(self.base, configs)?; + self.irq_routing.add(&entries)?; + + // Then register irqfds to the KVM module. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + Ok(()) + } + + fn disable(&self) -> Result<()> { + // First unregister all irqfds, so it won't trigger anymore. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + // Then tear down the IRQ routings for all the MSI interrupts. + let mut entries = Vec::with_capacity(self.count as usize); + for i in 0..self.count { + // Safe to unwrap because there's no legal way to break the mutex. + let msicfg = self.msi_configs[i as usize].config.lock().unwrap(); + let entry = new_msi_routing_entry(self.base + i, &*msicfg); + entries.push(entry); + } + self.irq_routing.remove(&entries)?; + + Ok(()) + } + + #[allow(irrefutable_let_patterns)] + fn update(&self, index: InterruptIndex, config: &InterruptSourceConfig) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + if let InterruptSourceConfig::MsiIrq(ref cfg) = config { + // Safe to unwrap because there's no legal way to break the mutex. + let entry = { + let mut msicfg = self.msi_configs[index as usize].config.lock().unwrap(); + msicfg.high_addr = cfg.high_addr; + msicfg.low_addr = cfg.low_addr; + msicfg.data = cfg.data; + new_msi_routing_entry(self.base + index, &*msicfg) + }; + self.irq_routing.modify(&entry) + } else { + Err(std::io::Error::from_raw_os_error(libc::EINVAL)) + } + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + if index >= self.count { + None + } else { + let msi_config = &self.msi_configs[index as usize]; + Some(&msi_config.irqfd) + } + } + + fn trigger(&self, index: InterruptIndex) -> Result<()> { + // Assume that the caller will maintain the interrupt states and only call this function + // when suitable. + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + let msi_config = &self.msi_configs[index as usize]; + msi_config.irqfd.write(1) + } +} From f8a77d5122d00fe049e4e6a1bec805bb2dff0f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=88=E6=83=85?= Date: Fri, 16 Aug 2019 10:34:59 +0800 Subject: [PATCH 26/29] Add unit tests for interrupt manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 守情 --- src/interrupt/kvm/legacy_irq.rs | 48 ++++++++++ src/interrupt/kvm/mod.rs | 154 +++++++++++++++++++++++++++++++ src/interrupt/kvm/msi_generic.rs | 67 ++++++++++++++ src/interrupt/kvm/msi_irq.rs | 86 +++++++++++++++++ 4 files changed, 355 insertions(+) diff --git a/src/interrupt/kvm/legacy_irq.rs b/src/interrupt/kvm/legacy_irq.rs index b728f2a..688cc17 100644 --- a/src/interrupt/kvm/legacy_irq.rs +++ b/src/interrupt/kvm/legacy_irq.rs @@ -156,3 +156,51 @@ impl InterruptSourceGroup for LegacyIrq { self.irqfd.write(1) } } + +#[cfg(test)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod test { + use super::*; + use kvm_ioctls::{Kvm, VmFd}; + + fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + #[test] + #[allow(unreachable_patterns)] + fn test_legacy_interrupt_group() { + let vmfd = Arc::new(create_vm_fd()); + let rounting = Arc::new(KvmIrqRouting::new(vmfd.clone())); + let base = 0; + let count = 1; + let group = LegacyIrq::new(base, count, vmfd.clone(), rounting.clone()).unwrap(); + + let mut legacy_fds = Vec::with_capacity(1); + legacy_fds.push(InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})); + + match group.interrupt_type() { + InterruptSourceType::LegacyIrq => {} + _ => { + panic!(); + } + } + assert_eq!(group.len(), 1); + assert_eq!(group.base(), base); + assert!(group.enable(&legacy_fds).is_ok()); + assert!(group.notifier(0).unwrap().write(1).is_ok()); + assert!(group.trigger(0).is_ok()); + assert!(group.trigger(1).is_err()); + assert!(group + .update( + 0, + &InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {}) + ) + .is_ok()); + assert!(group.disable().is_ok()); + + assert!(LegacyIrq::new(base, 2, vmfd.clone(), rounting.clone()).is_err()); + assert!(LegacyIrq::new(110, 1, vmfd.clone(), rounting.clone()).is_err()); + } +} diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index ee96fcd..c46819b 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -250,3 +250,157 @@ impl KvmIrqRouting { pub fn from_sys_util_errno(e: vmm_sys_util::errno::Error) -> std::io::Error { std::io::Error::from_raw_os_error(e.errno()) } + +#[cfg(any(target = "x86", target = "x86_64"))] +#[cfg(test)] +mod test { + use super::*; + use kvm_ioctls::{Kvm, VmFd}; + + //const VFIO_PCI_MSI_IRQ_INDEX: u32 = 1; + + fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + fn create_irq_group( + manager: Arc, + _vmfd: Arc, + ) -> Arc> { + let base = 0; + let count = 1; + + manager + .create_group(InterruptSourceType::LegacyIrq, base, count) + .unwrap() + } + + fn create_msi_group( + manager: Arc, + _vmfd: Arc, + ) -> Arc> { + let base = 168; + let count = 32; + + manager + .create_group(InterruptSourceType::MsiIrq, base, count) + .unwrap() + } + + const MASTER_PIC: usize = 7; + const SLAVE_PIC: usize = 8; + const IOAPIC: usize = 23; + + #[test] + fn test_create_kvmirqmanager() { + let vmfd = Arc::new(create_vm_fd()); + let manager = KvmIrqManager::new(vmfd.clone()); + assert!(vmfd.create_irq_chip().is_ok()); + assert!(manager.initialize().is_ok()); + } + + #[test] + fn test_kvmirqmanager_opt() { + let vmfd = Arc::new(create_vm_fd()); + assert!(vmfd.create_irq_chip().is_ok()); + let manager = Arc::new(KvmIrqManager::new(vmfd.clone())); + assert!(manager.initialize().is_ok()); + //irq + let group = create_irq_group(manager.clone(), vmfd.clone()); + let _ = group.clone(); + assert!(manager.destroy_group(group).is_ok()); + //msi + let group = create_msi_group(manager.clone(), vmfd.clone()); + let _ = group.clone(); + assert!(manager.destroy_group(group).is_ok()); + } + + #[test] + fn test_irqrouting_initialize_legacy() { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + assert!(routing.initialize().is_err()); + assert!(vmfd.create_irq_chip().is_ok()); + assert!(routing.initialize().is_ok()); + let routes = &routing.routes.lock().unwrap(); + assert_eq!(routes.len(), MASTER_PIC + SLAVE_PIC + IOAPIC); + } + + #[test] + fn test_routing_opt() { + // pub(super) fn modify(&self, entry: &kvm_irq_routing_entry) -> Result<()> { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + assert!(routing.initialize().is_err()); + assert!(vmfd.create_irq_chip().is_ok()); + assert!(routing.initialize().is_ok()); + + let mut entry = kvm_irq_routing_entry { + gsi: 8, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + + // Safe because we are initializing all fields of the `irqchip` struct. + unsafe { + entry.u.irqchip.irqchip = 0; + entry.u.irqchip.pin = 3; + } + + let entrys = vec![entry.clone()]; + + assert!(routing.modify(&entry).is_err()); + assert!(routing.add(&entrys).is_ok()); + unsafe { + entry.u.irqchip.pin = 4; + } + assert!(routing.modify(&entry).is_ok()); + assert!(routing.remove(&entrys).is_ok()); + assert!(routing.modify(&entry).is_err()); + } + + #[test] + fn test_routing_commit() { + let vmfd = Arc::new(create_vm_fd()); + let routing = KvmIrqRouting::new(vmfd.clone()); + + assert!(routing.initialize().is_err()); + assert!(vmfd.create_irq_chip().is_ok()); + assert!(routing.initialize().is_ok()); + + let mut entry = kvm_irq_routing_entry { + gsi: 8, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + unsafe { + entry.u.irqchip.irqchip = 0; + entry.u.irqchip.pin = 3; + } + + routing + .routes + .lock() + .unwrap() + .insert(hash_key(&entry), entry); + let routes = routing.routes.lock().unwrap(); + assert!(routing.commit(&routes).is_ok()); + } + + #[test] + fn test_has_key() { + let gsi = 4; + let mut entry = kvm_irq_routing_entry { + gsi, + type_: KVM_IRQ_ROUTING_IRQCHIP, + ..Default::default() + }; + // Safe because we are initializing all fields of the `irqchip` struct. + unsafe { + entry.u.irqchip.irqchip = KVM_IRQCHIP_PIC_MASTER; + entry.u.irqchip.pin = gsi; + } + assert_eq!(hash_key(&entry), 0x0001_0000_0004); + } +} diff --git a/src/interrupt/kvm/msi_generic.rs b/src/interrupt/kvm/msi_generic.rs index 755a5c5..f91a03b 100644 --- a/src/interrupt/kvm/msi_generic.rs +++ b/src/interrupt/kvm/msi_generic.rs @@ -56,3 +56,70 @@ pub(super) fn create_msi_routing_entries( } Ok(entries) } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_create_msiconfig() { + let config = MsiConfig::new(); + config.irqfd.write(1).unwrap(); + } + + #[test] + fn test_new_msi_routing_single() { + let test_gsi = 4; + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + }; + let entry = new_msi_routing_entry(test_gsi, &msi_source_config); + assert_eq!(entry.gsi, test_gsi); + assert_eq!(entry.type_, KVM_IRQ_ROUTING_MSI); + unsafe { + assert_eq!(entry.u.msi.address_hi, msi_source_config.high_addr); + assert_eq!(entry.u.msi.address_lo, msi_source_config.low_addr); + assert_eq!(entry.u.msi.data, msi_source_config.data); + } + } + + #[cfg(all( + feature = "legacy_irq", + any(target_arch = "x86", target_arch = "x86_64") + ))] + #[test] + fn test_new_msi_routing_multi() { + let mut msi_fds = Vec::with_capacity(16); + for _ in 0..16 { + msi_fds.push(InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + })); + } + let mut legacy_fds = Vec::with_capacity(16); + for _ in 0..16 { + legacy_fds.push(InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})); + } + + let base = 0; + let entrys = create_msi_routing_entries(0, &msi_fds).unwrap(); + + for (i, entry) in entrys.iter().enumerate() { + assert_eq!(entry.gsi, (base + i) as u32); + assert_eq!(entry.type_, KVM_IRQ_ROUTING_MSI); + if let InterruptSourceConfig::MsiIrq(config) = &msi_fds[i] { + unsafe { + assert_eq!(entry.u.msi.address_hi, config.high_addr); + assert_eq!(entry.u.msi.address_lo, config.low_addr); + assert_eq!(entry.u.msi.data, config.data); + } + } + } + + assert!(create_msi_routing_entries(0, &legacy_fds).is_err()); + assert!(create_msi_routing_entries(!0, &msi_fds).is_err()); + } +} diff --git a/src/interrupt/kvm/msi_irq.rs b/src/interrupt/kvm/msi_irq.rs index 8e10ac5..f4bc624 100644 --- a/src/interrupt/kvm/msi_irq.rs +++ b/src/interrupt/kvm/msi_irq.rs @@ -142,3 +142,89 @@ impl InterruptSourceGroup for MsiIrq { msi_config.irqfd.write(1) } } + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(test)] +mod test { + use super::*; + use kvm_ioctls::{Kvm, VmFd}; + + fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + #[test] + #[allow(unreachable_patterns)] + fn test_msi_interrupt_group() { + let vmfd = Arc::new(create_vm_fd()); + assert!(vmfd.create_irq_chip().is_ok()); + + let rounting = Arc::new(KvmIrqRouting::new(vmfd.clone())); + assert!(rounting.initialize().is_ok()); + + let base = 168; + let count = 32; + let group = MsiIrq::new( + base, + count, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + vmfd.clone(), + rounting.clone(), + ) + .unwrap(); + let mut msi_fds = Vec::with_capacity(count as usize); + + match group.interrupt_type() { + InterruptSourceType::MsiIrq => {} + _ => { + panic!(); + } + } + + for _ in 0..count { + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0x1234, + low_addr: 0x5678, + data: 0x9876, + }; + msi_fds.push(InterruptSourceConfig::MsiIrq(msi_source_config)); + } + + assert!(group.enable(&msi_fds).is_ok()); + assert_eq!(group.len(), count); + assert_eq!(group.base(), base); + + for i in 0..count { + let msi_source_config = MsiIrqSourceConfig { + high_addr: i + 0x1234, + low_addr: i + 0x5678, + data: i + 0x9876, + }; + assert!(group.notifier(i).unwrap().write(1).is_ok()); + assert!(group.trigger(i).is_ok()); + assert!(group + .update(0, &InterruptSourceConfig::MsiIrq(msi_source_config)) + .is_ok()); + } + assert!(group.trigger(33).is_err()); + assert!(group.disable().is_ok()); + + assert!(MsiIrq::new( + base, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE + 1, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + vmfd.clone(), + rounting.clone() + ) + .is_err()); + assert!(MsiIrq::new( + 1100, + 1, + DEFAULT_MAX_MSI_IRQS_PER_DEVICE, + vmfd.clone(), + rounting.clone() + ) + .is_err()); + } +} From a2a02a78ba898681efa08c17fa1b47e25f06cfd5 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 1 Feb 2020 02:16:03 +0800 Subject: [PATCH 27/29] Manage generic MSI interrupts based on VFIO devices Support generic MSI interrupts based on VFIO devices, this will be needed when enabling VFIO device passthrough. Signed-off-by: Liu Jiang --- Cargo.toml | 3 + src/interrupt/kvm/mod.rs | 17 ++ src/interrupt/kvm/vfio_msi_irq.rs | 394 ++++++++++++++++++++++++++++++ src/interrupt/mod.rs | 8 +- src/lib.rs | 3 + 5 files changed, 424 insertions(+), 1 deletion(-) create mode 100644 src/interrupt/kvm/vfio_msi_irq.rs diff --git a/Cargo.toml b/Cargo.toml index 1c0e80d..0e2c460 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,12 +10,15 @@ edition = "2018" libc = ">=0.2.39" kvm-bindings = { version = "~0", optional = true } kvm-ioctls = { version = "~0", optional = true } +vfio-ioctls = { git = "https://github.com/cloud-hypervisor/vfio-ioctls.git", branch = "dragonball", optional = true } vmm-sys-util = "~0" [features] legacy-irq = [] msi-irq = [] +vfio-msi-irq = [] kvm-irq = ["kvm-ioctls", "kvm-bindings"] kvm-msi-generic = ["msi-irq", "kvm-irq"] kvm-legacy-irq = ["legacy-irq", "kvm-irq"] kvm-msi-irq = ["kvm-msi-generic"] +kvm-vfio-msi-irq = ["kvm-msi-generic", "vfio-ioctls", "vfio-msi-irq"] diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index c46819b..b5a031a 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -27,6 +27,11 @@ mod msi_irq; #[cfg(feature = "kvm-msi-irq")] use self::msi_irq::MsiIrq; +#[cfg(feature = "kvm-vfio-msi-irq")] +mod vfio_msi_irq; +#[cfg(feature = "kvm-vfio-msi-irq")] +use self::vfio_msi_irq::VfioMsiIrq; + /// Maximum number of global interrupt sources. pub const MAX_IRQS: InterruptIndex = 1024; @@ -130,6 +135,18 @@ impl KvmIrqManagerObj { self.vmfd.clone(), self.routes.clone(), )?)), + #[cfg(feature = "kvm-vfio-msi-irq")] + InterruptSourceType::VfioMsiIrq(vfio_device, vfio_index) => { + Arc::new(Box::new(VfioMsiIrq::new( + base, + count, + self.max_msi_irqs, + self.vmfd.clone(), + self.routes.clone(), + vfio_device, + vfio_index, + )?)) + } _ => return Err(Error::from(ErrorKind::InvalidInput)), }; diff --git a/src/interrupt/kvm/vfio_msi_irq.rs b/src/interrupt/kvm/vfio_msi_irq.rs new file mode 100644 index 0000000..c2cf87c --- /dev/null +++ b/src/interrupt/kvm/vfio_msi_irq.rs @@ -0,0 +1,394 @@ +// Copyright (C) 2019-2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause + +//! Manage virtual device's PCI MSIx/Generic MSI interrupts based on Linux KVM and VFIO framework. +//! +//! The InterruptSourceGroup trait provides methods to inject virtual device interrupts into the +//! target virtual machine, so it's a type of interrupt event sink and doesn't handle the way to +//! generate interrupt events. On the other hand, a VFIO device may generate interrupt events, so +//! it's a type interrupt event source. +//! There are special optimizations to deliver an interrupt from a VFIO device to a virutal machine. +//! - Basic Mode. The virtual device driver register and eventfd to the VFIO driver, register +//! another irqfd to the KVM driver, and relays events from the eventfd to the irqfd. This is +//! not optimal for performance because every interrupt will cause a round-trip into the +//! userspace. +//! - Better Mode. The virtual device driver creates an irqfd, and register the irqfd to both the +//! VFIO driver and KVM driver. So an interrupt event will be relayed but the host kernel, but +//! it still causes VMExit for each interrupt. +//! - Best Mode. On x86 platforms with Posted Interrupt capability, the hardware could help to +//! deliver an hardware interrupt to a specific virtual machine, bypass the host kernel. + +use vfio_ioctls::VfioError; + +use super::msi_generic::{create_msi_routing_entries, new_msi_routing_entry, MsiConfig}; +use super::*; + +pub(super) struct VfioMsiIrq { + base: InterruptIndex, + count: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + vfio_device: Arc, + vfio_index: u32, + msi_configs: Vec, +} + +impl VfioMsiIrq { + #[allow(clippy::new_ret_no_self)] + pub(super) fn new( + base: InterruptIndex, + count: InterruptIndex, + max_msi_irqs: InterruptIndex, + vmfd: Arc, + irq_routing: Arc, + vfio_device: Arc, + vfio_index: u32, + ) -> Result { + if count > max_msi_irqs || base >= MAX_IRQS || base + count > MAX_IRQS { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + match vfio_device.get_irq_info(vfio_index) { + Some(ref info) => { + if info.count < count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + } + None => return Err(std::io::Error::from_raw_os_error(libc::EINVAL)), + } + + let mut msi_configs = Vec::with_capacity(count as usize); + for _ in 0..count { + msi_configs.push(MsiConfig::new()); + } + + Ok(VfioMsiIrq { + base, + count, + vmfd, + irq_routing, + vfio_device, + vfio_index, + msi_configs, + }) + } +} + +impl InterruptSourceGroup for VfioMsiIrq { + fn interrupt_type(&self) -> InterruptSourceType { + InterruptSourceType::VfioMsiIrq(self.vfio_device.clone(), self.vfio_index) + } + + fn len(&self) -> u32 { + self.count + } + + fn base(&self) -> u32 { + self.base + } + + fn enable(&self, configs: &[InterruptSourceConfig]) -> Result<()> { + if configs.len() != self.count as usize { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + // First add IRQ routings for all the MSI interrupts. + let entries = create_msi_routing_entries(self.base, configs)?; + self.irq_routing.add(&entries)?; + + // Then register irqfds to the KVM module. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + // At last configure the VFIO hardware device. + let mut fds = Vec::with_capacity(self.count as usize); + for i in 0..self.count { + fds.push(&self.msi_configs[i as usize].irqfd); + } + self.vfio_device + .enable_irq(self.vfio_index, fds) + .map_err(map_vfio_error)?; + + Ok(()) + } + + fn disable(&self) -> Result<()> { + // First disable interrupts from the VFIO hardware device + self.vfio_device + .disable_irq(self.vfio_index) + .map_err(map_vfio_error)?; + + // Then unregister all irqfds, so it won't trigger anymore. + for i in 0..self.count { + let irqfd = &self.msi_configs[i as usize].irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + i) + .map_err(from_sys_util_errno)?; + } + + // At last tear down the IRQ routings for all the MSI interrupts. + let mut entries = Vec::with_capacity(self.count as usize); + for i in 0..self.count { + // Safe to unwrap because there's no legal way to break the mutex. + let msicfg = self.msi_configs[i as usize].config.lock().unwrap(); + let entry = new_msi_routing_entry(self.base + i, &*msicfg); + entries.push(entry); + } + self.irq_routing.remove(&entries)?; + + Ok(()) + } + + fn update(&self, index: InterruptIndex, config: &InterruptSourceConfig) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + if let InterruptSourceConfig::MsiIrq(ref cfg) = config { + // Safe to unwrap because there's no legal way to break the mutex. + let entry = { + let mut msicfg = self.msi_configs[index as usize].config.lock().unwrap(); + msicfg.high_addr = cfg.high_addr; + msicfg.low_addr = cfg.low_addr; + msicfg.data = cfg.data; + + // Only need to update the KVM IRQ routings, no need to touch the VFIO device. + new_msi_routing_entry(self.base + index, &*msicfg) + }; + self.irq_routing.modify(&entry) + } else { + Err(std::io::Error::from_raw_os_error(libc::EINVAL)) + } + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + if index >= self.count { + None + } else { + let msi_config = &self.msi_configs[index as usize]; + Some(&msi_config.irqfd) + } + } + + fn trigger(&self, index: InterruptIndex) -> Result<()> { + // Assume that the caller will maintain the interrupt states and only call this function + // when suitable. + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + self.vfio_device + .trigger_irq(self.vfio_index, index) + .map_err(map_vfio_error) + } +} + +impl std::fmt::Debug for VfioMsiIrq { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "VFIO MSI Irq, base {}, vfio_index {} ", + self.base, self.vfio_index + ) + } +} + +fn map_vfio_error(err: VfioError) -> std::io::Error { + match err { + VfioError::OpenContainer(e) => e, + VfioError::OpenGroup(e, _f) => e, + VfioError::KvmSetDeviceAttr(e) => from_sys_util_errno(e), + _ => std::io::Error::from_raw_os_error(libc::EIO), + } +} + +// Following unit test cases depend on hardware configuration, disabled by default. +#[cfg(test_disabled)] +mod test { + use super::*; + use kvm_ioctls::{DeviceFd, Kvm, VmFd}; + use std::path::Path; + use vfio_ioctls::{VfioContainer, VfioDevice}; + + const VFIO_PCI_INTX_IRQ_INDEX: u32 = 0; + const VFIO_PCI_MSI_IRQ_INDEX: u32 = 1; + const VFIO_PCI_MSIX_IRQ_INDEX: u32 = 2; + + const BASE: u32 = 0; + + fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + fn create_kvm_device(vm: Arc) -> DeviceFd { + let mut vfio_dev = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + + vm.create_device(&mut vfio_dev).unwrap() + } + + fn vfio_msi_group_prepare( + vfio_index: u32, + pic_sys_path: &str, + ) -> (Arc, u32) { + let vmfd = Arc::new(create_vm_fd()); + assert!(vmfd.create_irq_chip().is_ok()); + let kvm_device = Arc::new(create_kvm_device(vmfd.clone())); + let sysfspath_eth1: &Path = Path::new(pic_sys_path); + let container = Arc::new(VfioContainer::new(kvm_device).unwrap()); + let vfio_device = Arc::new( + VfioDevice::new(sysfspath_eth1, container) + .map_err(|err| println!("{}", err)) + .unwrap(), + ); + + let count = match vfio_device.get_irq_info(vfio_index) { + Some(ref info) => info.count, + None => 0, + }; + + let rounting = Arc::new(KvmIrqRouting::new(vmfd.clone())); + + assert!(VfioMsiIrq::new( + BASE, + 33, + 32, + vmfd.clone(), + rounting.clone(), + vfio_device.clone(), + vfio_index + ) + .is_err()); + assert!(VfioMsiIrq::new( + 1100, + 1, + 32, + vmfd.clone(), + rounting.clone(), + vfio_device.clone(), + vfio_index + ) + .is_err()); + ( + Arc::new( + VfioMsiIrq::new( + BASE, + count, + 32, + vmfd.clone(), + rounting.clone(), + vfio_device.clone(), + vfio_index, + ) + .unwrap(), + ), + count, + ) + } + + fn vfio_msi_interrupt_group_opt(group: Arc, count: u32, index: u32) { + let mmio_base: u32 = 0xd000_0000; + let mut msi_fds: Vec = Vec::with_capacity(count as usize); + if index == VFIO_PCI_INTX_IRQ_INDEX { + msi_fds.push(InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})); + } else { + for i in 0..count { + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0, + low_addr: mmio_base + i * 0x1000, + data: 0x1000, + }; + msi_fds.push(InterruptSourceConfig::MsiIrq(msi_source_config)); + } + } + assert!(group.enable(&msi_fds).is_ok()); + assert_eq!(group.len(), count); + assert_eq!(group.base(), BASE); + + for i in 0..count { + assert!(group.irqfd(i).unwrap().write(1).is_ok()); + assert!(group.trigger(i, 0x168).is_err()); + assert!(group.trigger(i, 0).is_ok()); + assert!(group.ack(i, 0x168).is_err()); + assert!(group.ack(i, 0).is_ok()); + + if index == VFIO_PCI_INTX_IRQ_INDEX { + assert!(group + .update( + 0, + &InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {}) + ) + .is_ok()); + } else { + let msi_source_config = MsiIrqSourceConfig { + high_addr: 0, + low_addr: mmio_base + i * 0x1000, + data: i + 0x1000, + }; + assert!(group + .update(i, &InterruptSourceConfig::MsiIrq(msi_source_config)) + .is_ok()); + } + } + assert!(group.trigger(33, 0x168).is_err()); + assert!(group.ack(33, 0x168).is_err()); + assert!(group.disable().is_ok()); + } + + #[test] + fn test_vfio_msi_interrupt_group_intx() { + let (group0, count) = vfio_msi_group_prepare( + VFIO_PCI_INTX_IRQ_INDEX, + "/sys/bus/pci/devices/0000:5c:00.0/", + ); + if count != 0 { + vfio_msi_interrupt_group_opt(group0, count, VFIO_PCI_INTX_IRQ_INDEX); + } + let (group1, count) = vfio_msi_group_prepare( + VFIO_PCI_INTX_IRQ_INDEX, + "/sys/bus/pci/devices/0000:5d:00.0/", + ); + if count != 0 { + vfio_msi_interrupt_group_opt(group1, count, VFIO_PCI_INTX_IRQ_INDEX); + } + } + + #[test] + fn test_vfio_msi_interrupt_group_msi() { + let (group0, count) = + vfio_msi_group_prepare(VFIO_PCI_MSI_IRQ_INDEX, "/sys/bus/pci/devices/0000:5c:00.0/"); + if count != 0 { + vfio_msi_interrupt_group_opt(group0, count, VFIO_PCI_MSI_IRQ_INDEX); + } + let (group1, count) = + vfio_msi_group_prepare(VFIO_PCI_MSI_IRQ_INDEX, "/sys/bus/pci/devices/0000:5d:00.0/"); + if count != 0 { + vfio_msi_interrupt_group_opt(group1, count, VFIO_PCI_MSI_IRQ_INDEX); + } + } + + #[test] + #[ignore] + fn test_vfio_msi_interrupt_group_msix() { + let (group0, count) = vfio_msi_group_prepare( + VFIO_PCI_MSIX_IRQ_INDEX, + "/sys/bus/pci/devices/0000:5c:00.0/", + ); + if count != 0 { + vfio_msi_interrupt_group_opt(group0, count, VFIO_PCI_MSIX_IRQ_INDEX); + } + let (group1, count) = vfio_msi_group_prepare( + VFIO_PCI_MSIX_IRQ_INDEX, + "/sys/bus/pci/devices/0000:5d:00.0/", + ); + if count != 0 { + vfio_msi_interrupt_group_opt(group1, count, VFIO_PCI_MSIX_IRQ_INDEX); + } + } +} diff --git a/src/interrupt/mod.rs b/src/interrupt/mod.rs index c870190..96e1d21 100644 --- a/src/interrupt/mod.rs +++ b/src/interrupt/mod.rs @@ -55,6 +55,9 @@ //! according to guest configuration information use std::sync::Arc; + +#[cfg(feature = "vfio-msi-irq")] +use vfio_ioctls::VfioDevice; use vmm_sys_util::eventfd::EventFd; /// Reuse std::io::Result to simplify interoperability among crates. @@ -64,7 +67,7 @@ pub type Result = std::io::Result; pub type InterruptIndex = u32; /// Type of interrupt source. -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum InterruptSourceType { #[cfg(feature = "legacy-irq")] /// Legacy Pin-based Interrupt. @@ -74,6 +77,9 @@ pub enum InterruptSourceType { /// Message Signaled Interrupt (PCI MSI/PCI MSIx etc). /// Some non-PCI devices (like HPET on x86) make use of generic MSI in platform specific ways. MsiIrq, + #[cfg(feature = "vfio-msi-irq")] + /// Message Signalled Interrupt for PCI MSI/PCI MSIx based VFIO devices. + VfioMsiIrq(Arc, u32), } /// Configuration data for an interrupt source. diff --git a/src/lib.rs b/src/lib.rs index 7ace5fe..ce2b872 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,9 @@ //! rust-vmm device model. +#[cfg(feature = "kvm-vfio-msi-irq")] +extern crate vfio_ioctls; + use std::cmp::{Ord, PartialOrd}; use std::sync::Mutex; From f0c951e28fb86b0b45e8a445db345ac7cb65cae4 Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Wed, 19 Feb 2020 22:39:07 +0800 Subject: [PATCH 28/29] Support mask/unmask/get_pending_state Mask/unmask/get_pendign_state is needed to support PCI MSI/MSIx when enabling PCI device passthrough. Also document the overall design about the interrupt system. Signed-off-by: Liu Jiang --- src/interrupt/kvm/legacy_irq.rs | 52 +++++++++++++++++++++++++++++- src/interrupt/kvm/mod.rs | 55 ++++++++++++++++++++++++++++++++ src/interrupt/kvm/msi_generic.rs | 3 +- src/interrupt/kvm/msi_irq.rs | 49 ++++++++++++++++++++++++++++ src/interrupt/mod.rs | 5 +++ 5 files changed, 162 insertions(+), 2 deletions(-) diff --git a/src/interrupt/kvm/legacy_irq.rs b/src/interrupt/kvm/legacy_irq.rs index 688cc17..b7290cd 100644 --- a/src/interrupt/kvm/legacy_irq.rs +++ b/src/interrupt/kvm/legacy_irq.rs @@ -11,6 +11,7 @@ use super::*; use kvm_bindings::{ KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_IRQ_ROUTING_IRQCHIP, }; +use vmm_sys_util::eventfd::EFD_NONBLOCK; /// Maximum number of legacy interrupts supported. pub const MAX_LEGACY_IRQS: u32 = 24; @@ -40,7 +41,7 @@ impl LegacyIrq { Ok(LegacyIrq { base, vmfd, - irqfd: EventFd::new(0)?, + irqfd: EventFd::new(EFD_NONBLOCK)?, }) } @@ -155,6 +156,55 @@ impl InterruptSourceGroup for LegacyIrq { } self.irqfd.write(1) } + + fn mask(&self, index: InterruptIndex) -> Result<()> { + if index > 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn unmask(&self, index: InterruptIndex) -> Result<()> { + if index > 1 { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn get_pending_state(&self, index: InterruptIndex) -> bool { + if index > 1 { + return false; + } + + // Peak the EventFd.count by reading and writing back. + // The irqfd must be in NON-BLOCKING mode. + let irqfd = &self.irqfd; + match irqfd.read() { + Err(_) => false, + Ok(count) => { + if count != 0 { + if let Err(_) = irqfd.write(count) { + // Hope the caller will handle the pending state corrrectly, + // then no interrupt will be lost. + //panic!("really no way to recover here!!!!"); + } + } + count != 0 + } + } + } } #[cfg(test)] diff --git a/src/interrupt/kvm/mod.rs b/src/interrupt/kvm/mod.rs index b5a031a..c02c93d 100644 --- a/src/interrupt/kvm/mod.rs +++ b/src/interrupt/kvm/mod.rs @@ -6,6 +6,61 @@ //! When updaing KVM IRQ routing by ioctl(KVM_SET_GSI_ROUTING), all interrupts of the virtual //! machine must be updated all together. The [KvmIrqRouting](struct.KvmIrqRouting.html) //! structure is to maintain the global interrupt routing table. +//! +//! It deserves a good documentation about the way that KVM based vmms manages interrupts. +//! From the KVM hypervisor side, it provides three mechanism to support injecting interrupts into +//! guests: +//! 1) Irqfd. When data is written to an irqfd, it triggers KVM to inject an interrupt into guest. +//! 2) Irq routing. Irq routing determines the way to inject an irq into guest. +//! 3) Signal MSI. Vmm can inject an MSI interrupt into guest by issuing KVM_SIGNAL_MSI ioctl. +//! +//! Most VMMs use irqfd + irq routing to support interrupt injecting, so we will focus on this mode. +//! The flow to enable interrupt injecting is: +//! 1) VMM creates an irqfd +//! 2) VMM invokes KVM_IRQFD to bind the irqfd to an interrupt source +//! 3) VMM invokes KVM_SET_GSI_ROUTING to configure the way to inject the interrupt into guest +//! 4) device backend driver writes to the irqfd +//! 5) an interurpt is injected into the guest +//! +//! So far so good, right? Let's move on to mask/unmask/get_pending_state. That's the real tough +//! part. To support mask/unmask/get_peding_state, we must have a way to break the interrupt +//! delivery chain and maintain the pending state. Let's see how it's implemented by each VMM. +//! - Firecracker. It's very simple, it doesn't support mask/unmask/get_pending_state at all. +//! - Cloud Hypervisor. It builds the interrupt delivery path as: +//! vhost-backend-driver -> EeventFd -> CLH -> Irqfd -> Irqrouting -> Guest OS +//! It also maintains a masked/pending pair for each interrupt. When masking an interrupt, it +//! sets the masked flag and remove IrqRouting for the interrupt. +//! The CLH design has two shortcomings: +//! - it's inefficient for the hot interrupt delivery path. +//! - it may lose in-flight interrupts after removing IRQ routing entry for an interrupt due irqfd +//! implementation details. Buy me a cup of coffee if you wants to knwo the detail. +//! - Qemu. Qemu has a smart design, which supports: +//! - A fast path: driver -> irqfd -> Irqrouting -> Guest OS +//! - A slow path: driver -> eventfd -> qemu -> irqfd -> Irqrouting -> Guest OS +//! When masking an interrupt, it switches from fast path to slow path and vice versa when +//! unmasking an interrupt. +//! - Dragonball V1. We doesn't support mask/unmask/get_pending_state at all, we have also enhanced +//! the Virtio MMIO spec, we could use the fast path: driver -> irqfd -> Irqrouting -> Guest OS. +//! - Dragonball V2. When enabling PCI device passthrough, mask/unmask/get_pending_state is a must +//! to support PCI MSI/MSIx. Unlike Qemu fast path/slow path design, Dragonball V2 implements +//! mask/unmask/get_pending_state with fast path only. It works as follow: +//! 1) When masking an interrupt, unbind the irqfd from the interrupt by KVM_IRQFD. After that, +//! all writes to the irqfd won't trigger injecting anymore, and irqfd maintains count for +//! following write operations. +//! 2) When unmasking an interrupt, bind the irqfd to the interrupt again by KVM_IRQFD. When +//! rebinding, an interrupt will be injected into guest if the irqfd has a non-zero count. +//! 3) When getting pending state, peek the count of the irqfd. But the irqfd doesn't support +//! peek, so simulate peek by reading and writing back the count read. +//! By this design, we use the irqfd count to maintain interrupt pending state, and auto-inject +//! pending interrupts when rebinding. So we don't need to maintain the pending status bit. +//! +//! Why Qemu needs a slow path but Dragonball V2 doesn't need slow path? +//! Qemu needs to support a broad ranges of guest OSes and all kinds of device drivers. And some +//! legacy device drivers mask/unmask interrupt when handling each interrupt. +//! For Dragonball, we don't expect guest device driver exhibits such behaviors, and treat +//! mask/unmask/get_pending_state as cold path. We optimize for the hot interrupt delivery path +//! and avoid the complexity to introduce a slow path. The penalty is that get_pending_state() +//! will be much more expensive. use std::collections::HashMap; use std::io::{Error, ErrorKind}; diff --git a/src/interrupt/kvm/msi_generic.rs b/src/interrupt/kvm/msi_generic.rs index f91a03b..116bd6e 100644 --- a/src/interrupt/kvm/msi_generic.rs +++ b/src/interrupt/kvm/msi_generic.rs @@ -4,6 +4,7 @@ //! Helper utilities for handling MSI interrupts. use kvm_bindings::{kvm_irq_routing_entry, KVM_IRQ_ROUTING_MSI}; +use vmm_sys_util::eventfd::EFD_NONBLOCK; use super::*; @@ -15,7 +16,7 @@ pub(super) struct MsiConfig { impl MsiConfig { pub(super) fn new() -> Self { MsiConfig { - irqfd: EventFd::new(0).unwrap(), + irqfd: EventFd::new(EFD_NONBLOCK).unwrap(), config: Mutex::new(Default::default()), } } diff --git a/src/interrupt/kvm/msi_irq.rs b/src/interrupt/kvm/msi_irq.rs index f4bc624..6283232 100644 --- a/src/interrupt/kvm/msi_irq.rs +++ b/src/interrupt/kvm/msi_irq.rs @@ -141,6 +141,55 @@ impl InterruptSourceGroup for MsiIrq { let msi_config = &self.msi_configs[index as usize]; msi_config.irqfd.write(1) } + + fn mask(&self, index: InterruptIndex) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.msi_configs[index as usize].irqfd; + self.vmfd + .unregister_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn unmask(&self, index: InterruptIndex) -> Result<()> { + if index >= self.count { + return Err(std::io::Error::from_raw_os_error(libc::EINVAL)); + } + + let irqfd = &self.msi_configs[index as usize].irqfd; + self.vmfd + .register_irqfd(irqfd, self.base + index) + .map_err(from_sys_util_errno)?; + + Ok(()) + } + + fn get_pending_state(&self, index: InterruptIndex) -> bool { + if index >= self.count { + return false; + } + + // Peak the EventFd.count by reading and writing back. + // The irqfd must be in NON-BLOCKING mode. + let irqfd = &self.msi_configs[index as usize].irqfd; + match irqfd.read() { + Err(_) => false, + Ok(count) => { + if count != 0 { + if let Err(_) = irqfd.write(count) { + // Hope the caller will handle the pending state corrrectly, + // then no interrupt will be lost. + //panic!("really no way to recover here!!!!"); + } + } + count != 0 + } + } + } } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/src/interrupt/mod.rs b/src/interrupt/mod.rs index 96e1d21..da003c1 100644 --- a/src/interrupt/mod.rs +++ b/src/interrupt/mod.rs @@ -208,6 +208,11 @@ pub trait InterruptSourceGroup: Send + Sync { // To accommodate this, we can have a no-op here. Ok(()) } + + /// Check whether there's pending interrupt. + fn get_pending_state(&self, _index: InterruptIndex) -> bool { + false + } } #[cfg(feature = "kvm-irq")] From 5a7b0e193530bab43bda9201c3e033e7fa1aa2ac Mon Sep 17 00:00:00 2001 From: Liu Jiang Date: Sat, 9 Nov 2019 00:48:58 +0800 Subject: [PATCH 29/29] Add helper struct to manage device interrupt mode A device may support multiple interrupt modes. For example, a PCI device may support legacy, PCI MSI and PCI MSIx interrupts. So add struct DeviceInterruptManager to manage the device interupt working mode. This interrupt manager helps a device backend driver to manage its interrupts and provides interfaces to switch interrupt working modes. Signed-off-by: Liu Jiang --- Cargo.toml | 4 + src/interrupt/manager.rs | 644 +++++++++++++++++++++++++++++++++++++++ src/interrupt/mod.rs | 23 ++ 3 files changed, 671 insertions(+) create mode 100644 src/interrupt/manager.rs diff --git a/Cargo.toml b/Cargo.toml index 0e2c460..1ae791e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,14 @@ kvm-ioctls = { version = "~0", optional = true } vfio-ioctls = { git = "https://github.com/cloud-hypervisor/vfio-ioctls.git", branch = "dragonball", optional = true } vmm-sys-util = "~0" +[dev-dependencies] +byteorder = ">=1.2.1" + [features] legacy-irq = [] msi-irq = [] vfio-msi-irq = [] + kvm-irq = ["kvm-ioctls", "kvm-bindings"] kvm-msi-generic = ["msi-irq", "kvm-irq"] kvm-legacy-irq = ["legacy-irq", "kvm-irq"] diff --git a/src/interrupt/manager.rs b/src/interrupt/manager.rs new file mode 100644 index 0000000..73d42ea --- /dev/null +++ b/src/interrupt/manager.rs @@ -0,0 +1,644 @@ +// Copyright (C) 2019-2020 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +/// Interrupt manager to manage and switch device interrupt modes. +/// +/// A device may support multiple interrupt modes. For example, a PCI device may support legacy, +/// PCI MSI and PCI MSIx interrupts. This interrupt manager helps a device backend driver to manage +/// its interrupts and provides interfaces to switch interrupt working modes. +use std::io::{Error, Result}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::Arc; +use std::usize; + +#[cfg(feature = "legacy-irq")] +use super::LegacyIrqSourceConfig; +#[cfg(feature = "msi-irq")] +use super::MsiIrqSourceConfig; +use super::{InterruptManager, InterruptSourceConfig, InterruptSourceGroup, InterruptSourceType}; +use crate::resources::DeviceResources; + +#[cfg(feature = "legacy-irq")] +const LEGACY_CONFIGS: [InterruptSourceConfig; 1] = + [InterruptSourceConfig::LegacyIrq(LegacyIrqSourceConfig {})]; + +/// Device interrupt working modes. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum DeviceInterruptMode { + /// The device interrupt manager has been disabled. + Disabled = 0, + /// The device interrupt manager works in legacy irq mode. + LegacyIrq = 1, + /// The device interrupt manager works in generic MSI mode. + GenericMsiIrq = 2, + /// The device interrupt manager works in PCI MSI mode. + PciMsiIrq = 3, + /// The device interrupt manager works in PCI MSI-x mode. + PciMsixIrq = 4, +} + +/// A struct to manage interrupts and interrupt modes for a device. +/// +/// The interrupt manager may support multiple working mode. For example, an interrupt manager +/// for a PCI device may work in legacy mode, PCI MSI mode or PCI MSIx mode. Under certain +/// conditions, the interrupt manager may switch between interrupt working modes. To simplify +/// implementation, switching working mode is only supported at configuration stage and will be +/// disabled at runtime stage. The DeviceInterruptManager::enable() switches the interrupt manager +/// from configuration stage into runtime stage. And DeviceInterruptManager::reset() switches +/// from runtime stage back to initial configuration stage. +pub struct DeviceInterruptManager { + mode: DeviceInterruptMode, + activated: bool, + current_idx: usize, + mode2idx: [usize; 5], + intr_mgr: T, + intr_groups: Vec>>, + #[cfg(feature = "msi-irq")] + msi_config: Vec, +} + +impl DeviceInterruptManager { + /// Create an interrupt manager for a device. + /// + /// # Arguments + /// * `intr_mgr`: underline interrupt manager to allocate/free interrupt groups. + /// * `resources`: resources assigned to the device, including assigned interrupt resources. + pub fn new(intr_mgr: T, resources: &DeviceResources) -> Result { + let mut mgr = DeviceInterruptManager { + mode: DeviceInterruptMode::Disabled, + activated: false, + current_idx: usize::MAX, + mode2idx: [usize::MAX; 5], + intr_mgr, + intr_groups: Vec::new(), + #[cfg(feature = "msi-irq")] + msi_config: Vec::new(), + }; + + #[cfg(feature = "legacy-irq")] + { + if let Some(irq) = resources.get_legacy_irq() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::LegacyIrq, irq, 1)?; + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + } + + #[cfg(feature = "msi-irq")] + { + if let Some(msi) = resources.get_generic_msi_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::GenericMsiIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + + if let Some(msi) = resources.get_pci_msi_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::PciMsiIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + + if let Some(msi) = resources.get_pci_msix_irqs() { + let group = mgr + .intr_mgr + .create_group(InterruptSourceType::MsiIrq, msi.0, msi.1)?; + mgr.resize_msi_config_space(group.len()); + mgr.mode2idx[DeviceInterruptMode::PciMsixIrq as usize] = mgr.intr_groups.len(); + mgr.intr_groups.push(group); + } + } + + Ok(mgr) + } + + /// Check whether the interrupt manager has been activated. + pub fn is_enabled(&self) -> bool { + self.activated + } + + /// Switch the interrupt manager from configuration stage into runtime stage. + /// + /// The working mode could only be changed at configuration stage, and all requests to change + /// working mode at runtime stage will be rejected. + /// If the interrupt manager is still in DISABLED mode when DeviceInterruptManager::enable() + /// is called, it will be put into LEGACY mode if LEGACY mode is supported. + pub fn enable(&mut self) -> Result<()> { + if self.activated { + return Ok(()); + } + + // Enter Legacy mode by default if Legacy mode is supported. + if self.mode == DeviceInterruptMode::Disabled + && self.mode2idx[DeviceInterruptMode::LegacyIrq as usize] != usize::MAX + { + self.set_working_mode(DeviceInterruptMode::LegacyIrq)?; + } + if self.mode == DeviceInterruptMode::Disabled { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + self.intr_groups[self.current_idx].enable(self.get_configs(self.mode))?; + self.activated = true; + + Ok(()) + } + + /// Switch the interrupt manager from runtime stage back into initial configuration stage. + /// + /// Currently we doesn't track the usage of interrupt group object given out by `get_group()`, + /// so the the caller needs to take the responsibility to release all interrupt group object + /// reference before calling DeviceInterruptManager::reset(). + pub fn reset(&mut self) -> Result<()> { + if self.activated { + self.activated = false; + self.intr_groups[self.current_idx].disable()?; + } + self.set_working_mode(DeviceInterruptMode::Disabled)?; + + Ok(()) + } + + /// Get the current interrupt working mode. + pub fn get_working_mode(&mut self) -> DeviceInterruptMode { + self.mode + } + + /// Switch interrupt working mode. + /// + /// Currently switching working mode is only supported during device configuration stage and + /// will always return failure if called during device runtime stage. The device switches + /// from configuration stage to runtime stage by invoking `DeviceInterruptManager::enable()`. + /// With this constraint, the device drivers may call `DeviceInterruptManager::get_group()` to + /// get the underline active interrupt group object, and directly calls the interrupt group + /// object's methods to trigger/acknowledge interrupts. + /// + /// This is a key design decision for optimizing performance. Though the DeviceInterruptManager + /// object itself is not multi-thread safe and must be protected from concurrent access by the + /// caller, the interrupt source group object is multi-thread safe and could be called + /// concurrently to trigger/acknowledge interrupts. This design may help to improve performance + /// for MSI interrupts. + /// + /// # Arguments + /// * `mode`: target working mode. + pub fn set_working_mode(&mut self, mode: DeviceInterruptMode) -> Result<()> { + // Can't switch mode agian once enabled. + if self.activated { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + if mode != self.mode { + // Supported state transitions: + // other state -> DISABLED + // - DISABLED -> other + // - non-legacy -> legacy + // - legacy -> non-legacy + if self.mode != DeviceInterruptMode::Disabled + && self.mode != DeviceInterruptMode::LegacyIrq + && mode != DeviceInterruptMode::LegacyIrq + && mode != DeviceInterruptMode::Disabled + { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + // Then enter new state + if mode != DeviceInterruptMode::Disabled { + self.reset_configs(mode); + self.current_idx = self.mode2idx[mode as usize]; + } + self.mode = mode; + } + + Ok(()) + } + + /// Get the underline interrupt source group object, so the device driver could concurrently + /// trigger/acknowledge interrupts by using the returned group object. + pub fn get_group(&self) -> Option>> { + if !self.activated || self.mode == DeviceInterruptMode::Disabled { + None + } else { + Some(self.intr_groups[self.current_idx].clone()) + } + } + + /// Reconfigure a specific interrupt in current working mode at configuration or runtime stage. + /// + /// It's mainly used to reconfigure Generic MSI/PCI MSI/PCI MSIx interrupts. Actually legacy + /// interrupts don't support reconfiguration yet. + #[allow(unused_variables)] + pub fn update(&mut self, index: u32) -> Result<()> { + if !self.activated { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + + match self.mode { + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + let group = &self.intr_groups[self.current_idx as usize]; + if index >= group.len() || index >= self.msi_config.len() as u32 { + return Err(Error::from_raw_os_error(libc::EINVAL)); + } + group.update(index, &self.msi_config[index as usize])?; + Ok(()) + } + _ => Err(Error::from_raw_os_error(libc::EINVAL)), + } + } + + fn get_configs(&self, mode: DeviceInterruptMode) -> &[InterruptSourceConfig] { + match mode { + #[cfg(feature = "legacy-irq")] + DeviceInterruptMode::LegacyIrq => &LEGACY_CONFIGS[..], + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + let idx = self.mode2idx[mode as usize]; + let group_len = self.intr_groups[idx].len() as usize; + &self.msi_config[0..group_len] + } + _ => panic!("unhandled interrupt type in get_configs()"), + } + } + + fn reset_configs(&mut self, mode: DeviceInterruptMode) { + match mode { + #[cfg(feature = "msi-irq")] + DeviceInterruptMode::GenericMsiIrq + | DeviceInterruptMode::PciMsiIrq + | DeviceInterruptMode::PciMsixIrq => { + self.msi_config = vec![ + InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig::default()); + self.msi_config.len() + ]; + } + _ => {} + } + } +} + +#[cfg(feature = "msi-irq")] +impl DeviceInterruptManager { + /// Set the high address for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_high_address(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.high_addr = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Set the low address for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_low_address(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.low_addr = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + /// Set the data for a MSI message. + #[allow(irrefutable_let_patterns)] + pub fn set_msi_data(&mut self, index: u32, data: u32) -> Result<()> { + if (index as usize) < self.msi_config.len() { + if let InterruptSourceConfig::MsiIrq(ref mut msi) = self.msi_config[index as usize] { + msi.data = data; + return Ok(()); + } + } + Err(Error::from_raw_os_error(libc::EINVAL)) + } + + fn resize_msi_config_space(&mut self, size: u32) { + if self.msi_config.len() < size as usize { + self.msi_config = + vec![InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig::default()); size as usize]; + } + } +} + +/// Struct to implement a 32-bit interrupt status register. +pub struct InterruptStatusRegister32 { + status: AtomicU32, +} + +impl InterruptStatusRegister32 { + /// Create a status register instance. + pub fn new() -> Self { + InterruptStatusRegister32 { + status: AtomicU32::new(0), + } + } + + /// Read current value of the status register. + pub fn read(&self) -> u32 { + self.status.load(Ordering::SeqCst) + } + + /// Write value to the status register. + pub fn write(&self, value: u32) { + self.status.store(value, Ordering::SeqCst); + } + + /// Read current value and reset the status register to 0. + pub fn read_and_clear(&self) -> u32 { + self.status.swap(0, Ordering::SeqCst) + } + + /// Set bits into `value`. + pub fn set_bits(&self, value: u32) { + self.status.fetch_or(value, Ordering::SeqCst); + } + + /// Clear bits present in `value`. + pub fn clear_bits(&self, value: u32) { + self.status.fetch_and(!value, Ordering::SeqCst); + } +} + +#[cfg(all(test, feature = "kvm-legacy-irq", feature = "kvm-msi-irq"))] +mod tests { + use super::*; + use crate::interrupt::KvmIrqManager; + use crate::resources::{DeviceResources, MsiIrqType, Resource}; + use kvm_ioctls::{Kvm, VmFd}; + use std::sync::Arc; + + fn create_vm_fd() -> VmFd { + let kvm = Kvm::new().unwrap(); + kvm.create_vm().unwrap() + } + + fn create_init_resources() -> DeviceResources { + let mut resources = DeviceResources::new(); + + resources.append(Resource::MmioAddressRange { + base: 0xd000_0000, + size: 0x10_0000, + }); + resources.append(Resource::LegacyIrq(0)); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::GenericMsi, + base: 0x200, + size: 0x10, + }); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::PciMsi, + base: 0x100, + size: 0x20, + }); + resources.append(Resource::MsiIrq { + ty: MsiIrqType::PciMsix, + base: 0x300, + size: 0x20, + }); + + resources + } + + fn create_interrupt_manager() -> DeviceInterruptManager> { + let vmfd = Arc::new(create_vm_fd()); + assert!(vmfd.create_irq_chip().is_ok()); + let intr_mgr = Arc::new(KvmIrqManager::new(vmfd.clone())); + + let resource = create_init_resources(); + assert!(intr_mgr.initialize().is_ok()); + DeviceInterruptManager::new(intr_mgr.clone(), &resource).unwrap() + } + + #[test] + fn test_create_device_interrupt_manager() { + let mut mgr = create_interrupt_manager(); + + assert_eq!(mgr.mode, DeviceInterruptMode::Disabled); + assert_eq!(mgr.activated, false); + assert_eq!(mgr.current_idx, usize::MAX); + assert_eq!(mgr.intr_groups.len(), 4); + assert_eq!(mgr.is_enabled(), false); + assert!(mgr.get_group().is_none()); + + // Enter legacy mode by default + mgr.enable().unwrap(); + assert_eq!(mgr.is_enabled(), true); + assert_eq!( + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize], + mgr.current_idx + ); + assert!(mgr.get_group().is_some()); + + // Disable interrupt manager + mgr.reset().unwrap(); + assert_eq!(mgr.is_enabled(), false); + assert_eq!( + mgr.mode2idx[DeviceInterruptMode::LegacyIrq as usize], + mgr.current_idx + ); + assert_eq!(mgr.get_working_mode(), DeviceInterruptMode::Disabled); + assert!(mgr.get_group().is_none()); + } + + #[test] + fn test_device_interrupt_manager_switch_mode() { + let mut mgr = create_interrupt_manager(); + + // Can't switch working mode in enabled state. + mgr.enable().unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + mgr.reset().unwrap(); + + // Switch from LEGACY to PciMsi mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from LEGACY to PciMsix mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from LEGACY to GenericMsi mode + mgr.set_working_mode(DeviceInterruptMode::LegacyIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + + // Switch from DISABLED to PciMsi mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from DISABLED to PciMsix mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap_err(); + + // Switch from DISABLED to GenericMsi mode + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .unwrap(); + mgr.set_working_mode(DeviceInterruptMode::PciMsiIrq) + .unwrap_err(); + mgr.set_working_mode(DeviceInterruptMode::PciMsixIrq) + .unwrap_err(); + + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + mgr.set_working_mode(DeviceInterruptMode::Disabled).unwrap(); + } + + #[test] + fn test_error() { + let mut interrupt_manager = create_interrupt_manager(); + + assert!(interrupt_manager.set_msi_data(512, 0).is_err()); + assert!(interrupt_manager.set_msi_data(0, 0).is_ok()); + assert!(interrupt_manager.set_msi_high_address(512, 0).is_err()); + assert!(interrupt_manager.set_msi_high_address(0, 0).is_ok()); + assert!(interrupt_manager.set_msi_low_address(512, 0).is_err()); + assert!(interrupt_manager.set_msi_low_address(0, 0).is_ok()); + + interrupt_manager.activated = true; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::Disabled) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsiIrq) + .is_err()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::PciMsixIrq) + .is_err()); + } + + #[test] + fn test_disable2legacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_ok()); + } + + #[test] + fn test_disable2nonlegacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_ok()); + } + + #[test] + fn test_legacy2nonlegacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_ok()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_ok()); + } + + #[test] + fn test_nonlegacy2legacy() { + let mut interrupt_manager = create_interrupt_manager(); + interrupt_manager.activated = false; + interrupt_manager.mode = DeviceInterruptMode::Disabled; + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_ok()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_ok()); + } + + #[test] + fn test_update() { + let mut interrupt_manager = create_interrupt_manager(); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::GenericMsiIrq) + .is_ok()); + assert!(interrupt_manager.enable().is_ok()); + assert!(interrupt_manager.update(0x10).is_err()); + assert!(interrupt_manager.update(0x01).is_ok()); + assert!(interrupt_manager.reset().is_ok()); + assert!(interrupt_manager + .set_working_mode(DeviceInterruptMode::LegacyIrq) + .is_ok()); + assert!(interrupt_manager.update(0x10).is_err()); + } + + #[test] + fn test_interrupt_status_register() { + let status = InterruptStatusRegister32::new(); + + assert_eq!(status.read(), 0); + status.write(0x13); + assert_eq!(status.read(), 0x13); + status.clear_bits(0x11); + assert_eq!(status.read(), 0x2); + status.set_bits(0x100); + assert_eq!(status.read_and_clear(), 0x102); + assert_eq!(status.read(), 0); + } +} diff --git a/src/interrupt/mod.rs b/src/interrupt/mod.rs index da003c1..28f9921 100644 --- a/src/interrupt/mod.rs +++ b/src/interrupt/mod.rs @@ -54,12 +54,17 @@ //! * the virtual device backend requests the interrupt manager to create an interrupt group //! according to guest configuration information +use std::io::Error; +use std::ops::Deref; use std::sync::Arc; #[cfg(feature = "vfio-msi-irq")] use vfio_ioctls::VfioDevice; use vmm_sys_util::eventfd::EventFd; +mod manager; +pub use manager::{DeviceInterruptManager, DeviceInterruptMode, InterruptStatusRegister32}; + /// Reuse std::io::Result to simplify interoperability among crates. pub type Result = std::io::Result; @@ -143,6 +148,24 @@ pub trait InterruptManager { fn destroy_group(&self, group: Arc>) -> Result<()>; } +impl InterruptManager for Arc { + fn create_group( + &self, + type_: InterruptSourceType, + base: u32, + count: u32, + ) -> std::result::Result>, Error> { + self.deref().create_group(type_, base, count) + } + + fn destroy_group( + &self, + group: Arc>, + ) -> std::result::Result<(), Error> { + self.deref().destroy_group(group) + } +} + /// Trait to manage a group of interrupt sources for a device. /// /// A device may support several types of interrupts, and each type of interrupt may contain one or