diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index acffa1c98..0faa544ba 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -19,10 +19,10 @@ use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag}; use propolis_client::support::nvme_serial_from_str; use propolis_client::types::{ BlobStorageBackend, Board, Chipset, ComponentV0, CrucibleStorageBackend, - I440Fx, InstanceEnsureRequest, InstanceInitializationMethod, - InstanceMetadata, InstanceSpecGetResponse, InstanceSpecV0, NvmeDisk, - QemuPvpanic, ReplacementComponent, SerialPort, SerialPortNumber, - VirtioDisk, + GuestHypervisorInterface, I440Fx, InstanceEnsureRequest, + InstanceInitializationMethod, InstanceMetadata, InstanceSpecGetResponse, + InstanceSpecV0, NvmeDisk, QemuPvpanic, ReplacementComponent, SerialPort, + SerialPortNumber, VirtioDisk, }; use propolis_client::{PciPath, SpecKey}; use propolis_config_toml::spec::SpecConfig; @@ -189,6 +189,10 @@ struct VmConfig { // cloud_init ISO file #[clap(long, action, conflicts_with = "spec")] cloud_init: Option, + + /// enable Hyper-V compatible enlightenments for this VM + #[clap(long, action)] + hyperv: bool, } fn add_component_to_spec( @@ -293,7 +297,11 @@ impl VmConfig { cpuid: None, cpus: self.vcpus, memory_mb: self.memory, - guest_hv_interface: None, + guest_hv_interface: if self.hyperv { + Some(GuestHypervisorInterface::HyperV { features: vec![] }) + } else { + None + }, }, components: Default::default(), }; diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 8c1a9b251..f210471f3 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -29,7 +29,9 @@ use std::sync::Arc; use oximeter::types::ProducerRegistry; use oximeter_instruments::kstat::KstatSampler; -use propolis::enlightenment::{bhyve::BhyveGuestInterface, Enlightenment}; +use propolis::enlightenment::{ + bhyve::BhyveGuestInterface, hyperv::HyperV, Enlightenment, +}; use propolis_api_types::{ instance_spec::components::board::GuestHypervisorInterface, InstanceEnsureResponse, InstanceMigrateInitiateResponse, @@ -391,9 +393,19 @@ async fn initialize_vm_objects( let vmm_log = log.new(slog::o!("component" => "vmm")); - let guest_hv_interface = match spec.board.guest_hv_interface { - GuestHypervisorInterface::Bhyve => Arc::new(BhyveGuestInterface), - }; + let (guest_hv_interface, guest_hv_lifecycle) = + match spec.board.guest_hv_interface { + GuestHypervisorInterface::Bhyve => { + let bhyve = Arc::new(BhyveGuestInterface); + let lifecycle = bhyve.clone(); + (bhyve as Arc, lifecycle.as_lifecycle()) + } + GuestHypervisorInterface::HyperV { .. } => { + let hyperv = Arc::new(HyperV::new(&vmm_log)); + let lifecycle = hyperv.clone(); + (hyperv as Arc, lifecycle.as_lifecycle()) + } + }; // Set up the 'shell' instance into which the rest of this routine will // add components. @@ -458,7 +470,7 @@ async fn initialize_vm_objects( let ramfb = init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?; - init.register_guest_hv_interface(guest_hv_interface.as_lifecycle()); + init.register_guest_hv_interface(guest_hv_lifecycle); init.initialize_cpus().await?; let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( &machine, diff --git a/crates/propolis-api-types/src/instance_spec/components/board.rs b/crates/propolis-api-types/src/instance_spec/components/board.rs index 9dd879090..6a8eff7cb 100644 --- a/crates/propolis-api-types/src/instance_spec/components/board.rs +++ b/crates/propolis-api-types/src/instance_spec/components/board.rs @@ -5,6 +5,8 @@ //! VM mainboard components. Every VM has a board, even if it has no other //! peripherals. +use std::collections::BTreeSet; + use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -92,6 +94,29 @@ pub struct CpuidEntry { pub edx: u32, } +/// Flags that enable "simple" Hyper-V enlightenments that require no +/// feature-specific configuration. +// +// NOTE: This enum's variants should never have any associated data (note that +// the type doesn't use serde's `tag` and `content` attributes). If a future +// enlightenment requires associated data, it should be put into a +// `HyperVExtendedFeatures` struct (or similar), and the `HyperV` variant of +// `GuestHypervisorInterface` should be extended to `Option`ally include that +// struct. +#[derive( + Clone, + Deserialize, + Serialize, + Debug, + JsonSchema, + Ord, + PartialOrd, + Eq, + PartialEq, +)] +#[serde(deny_unknown_fields)] +pub enum HyperVFeatureFlag {} + /// A hypervisor interface to expose to the guest. #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema, Default)] #[serde(deny_unknown_fields, tag = "type", content = "value")] @@ -100,6 +125,10 @@ pub enum GuestHypervisorInterface { /// leaf 0x4000_0000 and no additional leaves or features). #[default] Bhyve, + + /// Expose a Hyper-V-compatible hypervisor interface with the supplied + /// features enabled. + HyperV { features: BTreeSet }, } impl GuestHypervisorInterface { diff --git a/lib/propolis/src/common.rs b/lib/propolis/src/common.rs index e80d85090..7d118d611 100644 --- a/lib/propolis/src/common.rs +++ b/lib/propolis/src/common.rs @@ -467,7 +467,7 @@ impl RWOp<'_, '_> { } /// An address within a guest VM. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct GuestAddr(pub u64); impl GuestAddr { diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs new file mode 100644 index 000000000..ce8bf2e2b --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -0,0 +1,123 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Constant definitions and flags for Hyper-V emulations. These are drawn from +//! the Hyper-V TLFS version 6.0b (referred to as "TLFS" below). See the parent +//! module documentation for more details. +//! +//! Where possible, constants in this module (such as MSR identifiers) are given +//! names that match those used in the TLFS. + +use cpuid_utils::CpuidValues; + +/// Hyper-V-compatible hypervisors are required to support hypervisor CPUID +/// leaves up to 0x4000_0005. +pub(super) const HYPERV_MIN_REQUIRED_CPUID_LEAF: u32 = 0x40000005; + +/// CPUID leaf 0x4000_0000 contains hypervisor identifying information. eax +/// receives the highest valid CPUID leaf in the hypervisor range. ebx, ecx, and +/// edx receive a 12-byte vendor ID. +/// +/// In order to get both Linux and Windows guests to accept these +/// enlightenments, the ebx/ecx/edx ID here is set to "Microsoft Hv". Windows +/// guests will accept other vendor IDs (they look at leaf 0x4000_0001 eax to +/// identify the hypervisor interface instead of reading the vendor ID in leaf +/// 0), but Linux guests only consider the vendor ID. +const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues { + eax: HYPERV_MIN_REQUIRED_CPUID_LEAF, + ebx: 0x7263694D, + ecx: 0x666F736F, + edx: 0x76482074, +}; + +/// Generates values for CPUID leaf 0x4000_0000, which contains hypervisor +/// identifying information. eax receives the value of `max_leaf`, the maximum +/// valid CPUID leaf in the hypervisor range; ebx, ecx, and edx contain an +/// appropriate vendor ID. +/// +/// `max_leaf` supplies the maximum valid CPUID leaf in the hypervisor range. +/// +/// # Panics +/// +/// Panics if `max_leaf` is less than [`HYPERV_MIN_REQUIRED_CPUID_LEAF`]. +pub(super) fn hyperv_leaf_0_values(max_leaf: u32) -> CpuidValues { + assert!( + max_leaf >= HYPERV_MIN_REQUIRED_CPUID_LEAF, + "requested max leaf {max_leaf:#x} less than minimum required" + ); + + CpuidValues { eax: max_leaf, ..HYPERV_LEAF_0_VALUES } +} + +/// Hyper-V leaf 0x4000_0001 contains an (ostensibly vendor-neutral) interface +/// identifier. eax receives "Hv#1"; the other three outputs are reserved. +pub(super) const HYPERV_LEAF_1_VALUES: CpuidValues = + CpuidValues { eax: 0x31237648, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0002 contains hypervisor version information. To avoid +/// having to reason about what it means to expose a specific hypervisor version +/// across a live migration between potentially different host and/or Propolis +/// versions, this information is always set to 0. +pub(super) const HYPERV_LEAF_2_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +bitflags::bitflags! { + /// Hyper-V leaf 0x4000_0003 eax returns synthetic MSR access rights. + /// Only the bits actually used by this enlightenment stack are enumerated + /// here. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct HyperVLeaf3Eax: u32 { + const PARTITION_REFERENCE_COUNTER = 1 << 1; + const HYPERCALL = 1 << 5; + const VP_INDEX = 1 << 6; + const PARTITION_REFERENCE_TSC = 1 << 9; + + // Bits 14-31 of this register are reserved. + } +} + +impl Default for HyperVLeaf3Eax { + /// Grants access to the VP index and hypercall MSRs. This is the minimum + /// set of access rights that all Hyper-V-compatible hypervisors must grant. + fn default() -> Self { + HyperVLeaf3Eax::VP_INDEX | HyperVLeaf3Eax::HYPERCALL + } +} + +/// Hyper-V leaf 0x4000_0004 describes behavior that the guest OS should +/// implement for optimal performance. Propolis expresses no opinion about these +/// options, except that it indicates in ebx that the guest should never try to +/// notify the hypervisor about failed spinlock acquisitions. +pub(super) const HYPERV_LEAF_4_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0xFFFFFFFF, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0005 describes the hypervisor's CPU and interrupt +/// remapping limits. Hypervisors are allowed not to expose these limits by +/// publishing 0s to this leaf. +pub(super) const HYPERV_LEAF_5_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Allows the guest to report its type and version information. See TLFS +/// section 2.6 for details about this MSR's format. +/// +/// Guest OSes are required to identify themselves via this MSR before they can +/// set the enabled bit in [`HV_X64_MSR_HYPERCALL`] or make any hypercalls. +/// +/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. +pub(super) const HV_X64_MSR_GUEST_OS_ID: u32 = 0x4000_0000; + +/// Specifies the guest physical address at which the guest would like to place +/// the hypercall page. See TLFS section 3.13 and the [`MsrHypercalLValue`] +/// struct. +/// +/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. +/// +/// [`MsrHypercallValue`]: super::hypercall::MsrHypercallValue +pub(super) const HV_X64_MSR_HYPERCALL: u32 = 0x4000_0001; + +/// Guests may read this register to obtain the index of the vCPU that read the +/// register. +/// +/// Read-only; requires the [`HyperVLeaf3Eax::VP_INDEX`] privilege. +pub(super) const HV_X64_MSR_VP_INDEX: u32 = 0x4000_0002; diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs new file mode 100644 index 000000000..215cbcc11 --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -0,0 +1,80 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Support for hypercalls and their related MSRs. + +use crate::common::{GuestAddr, PAGE_MASK, PAGE_SIZE}; + +const LOCKED_BIT: u64 = 1; +const LOCKED_MASK: u64 = 1 << LOCKED_BIT; +const ENABLED_BIT: u64 = 0; +const ENABLED_MASK: u64 = 1 << ENABLED_BIT; + +/// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register. +/// +/// Writing to this register enables the hypercall page. The hypervisor +/// overwrites this page with an instruction sequence that the guest should +/// execute in order to issue a call to the hypervisor. See +/// [`HYPERCALL_INSTRUCTION_SEQUENCE`]. +/// +/// Bits 11:2 of this register are reserved. The TLFS specifies that the guest +/// "should ignore [them] on reads and preserve [them] on writes," but imposes +/// no particular penalties on guests that modify these bits. +/// +/// [`HV_X64_MSR_HYPERCALL`]: super::bits::HV_X64_MSR_HYPERCALL +#[derive(Clone, Copy, Default)] +pub(super) struct MsrHypercallValue(pub(super) u64); + +impl std::fmt::Debug for MsrHypercallValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MsrHypercallValue") + .field("raw", &format!("{:#x}", self.0)) + .field("gpa", &format!("{:#x}", self.gpa().0)) + .field("locked", &self.locked()) + .field("enabled", &self.enabled()) + .finish() + } +} + +impl MsrHypercallValue { + /// Returns the guest physical address at which the guest would like the + /// hypercall page to be placed. + pub fn gpa(&self) -> GuestAddr { + GuestAddr(self.0 & PAGE_MASK as u64) + } + + /// Returns whether the hypercall page location is locked. Once locked, the + /// value in `MSR_HYPERCALL` cannot change until the hypervisor resets the + /// guest. + pub fn locked(&self) -> bool { + (self.0 & LOCKED_MASK) != 0 + } + + /// Indicates whether the hypercall page is enabled. + pub fn enabled(&self) -> bool { + (self.0 & ENABLED_MASK) != 0 + } + + /// Clears this value's enabled bit. + pub fn clear_enabled(&mut self) { + self.0 &= !ENABLED_MASK; + } +} + +/// The sequence of instructions to write to the hypercall page. This sequence +/// is `mov rax, 2; ret`, which returns a "not supported" status for all +/// hypercalls without actually requiring the guest to exit. +// +// If and when actual hypercall support is required, this should change to +// either `0f 01 c1` (VMCALL) or `0f 01 d9` (VMMCALL), depending on whether the +// host is VMX- or SVM-based. +const HYPERCALL_INSTRUCTION_SEQUENCE: [u8; 8] = + [0x48, 0xc7, 0xc0, 0x02, 0x00, 0x00, 0x00, 0xc3]; + +/// Yields a page-sized buffer containing the contents of the hypercall page. +pub(super) fn hypercall_page_contents() -> [u8; PAGE_SIZE] { + let mut page = [0u8; PAGE_SIZE]; + page[0..8].copy_from_slice(&HYPERCALL_INSTRUCTION_SEQUENCE); + page +} diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs new file mode 100644 index 000000000..a40fbfe73 --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -0,0 +1,324 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Support for Microsoft Hyper-V emulation. +//! +//! Windows guests and many Linux guests can interoperate with hypervisors that +//! implement the hypervisor described in Microsoft's Hypervisor Top-Level +//! Functional Specification (TLFS). The behavior in this module is based on +//! version 6.0b of the TLFS, which is available on GitHub: +//! https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/main/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf +//! +//! Microsoft also maintains a list of minimum requirements for any hypervisor +//! that intends to implement a Hyper-V-compatible interface: +//! https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/main/tlfs/Requirements%20for%20Implementing%20the%20Microsoft%20Hypervisor%20Interface.pdf + +use std::sync::Mutex; + +use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; + +use crate::{ + accessors::MemAccessor, + common::{GuestRegion, Lifecycle, VcpuId, PAGE_SIZE}, + enlightenment::{ + hyperv::{ + bits::*, + hypercall::{hypercall_page_contents, MsrHypercallValue}, + }, + AddCpuidError, + }, + migrate::{ + MigrateCtx, MigrateSingle, MigrateStateError, Migrator, PayloadOffer, + PayloadOutput, + }, + msr::{MsrId, RdmsrOutcome, WrmsrOutcome}, + vmm::SubMapping, +}; + +mod bits; +mod hypercall; + +#[usdt::provider(provider = "propolis")] +mod probes { + fn hyperv_wrmsr_guest_os_id(val: u64) {} + fn hyperv_wrmsr_hypercall(val: u64, gpa: u64, locked: bool, enabled: bool) { + } + fn hyperv_wrmsr_hypercall_bad_gpa(gpa: u64) {} +} + +const TYPE_NAME: &str = "guest-hyperv-interface"; + +#[derive(Debug, Default)] +struct Inner { + /// The last value stored in the [`bits::HV_X64_MSR_GUEST_OS_ID`] MSR. + msr_guest_os_id_value: u64, + + /// The last value stored in the [`bits::HV_X64_MSR_HYPERCALL`] MSR. + msr_hypercall_value: MsrHypercallValue, +} + +pub struct HyperV { + #[allow(dead_code)] + log: slog::Logger, + inner: Mutex, + acc_mem: MemAccessor, +} + +impl HyperV { + /// Creates a new Hyper-V enlightenment stack. + pub fn new(log: &slog::Logger) -> Self { + let acc_mem = MemAccessor::new_orphan(); + let log = log.new(slog::o!("component" => "hyperv")); + Self { log, inner: Mutex::new(Inner::default()), acc_mem } + } + + /// Handles a write to the HV_X64_MSR_GUEST_OS_ID register. + fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { + probes::hyperv_wrmsr_guest_os_id!(|| value); + let mut inner = self.inner.lock().unwrap(); + + // TLFS section 3.13 says that the hypercall page "becomes disabled" if + // the guest OS ID register is cleared after the hypercall register is + // set. It also specifies that attempts to set the Enabled bit in that + // register will be ignored if the guest OS ID is zeroed, so handle this + // case by clearing the hypercall MSR's Enabled bit but otherwise + // leaving the hypercall page untouched (as would happen if the guest + // manually cleared this bit). + if value == 0 { + inner.msr_hypercall_value.clear_enabled(); + } + + inner.msr_guest_os_id_value = value; + WrmsrOutcome::Handled + } + + /// Handles a write to the HV_X64_MSR_HYPERCALL register. See TLFS section + /// 3.13 and [`MsrHypercallValue`]. + fn handle_wrmsr_hypercall(&self, value: u64) -> WrmsrOutcome { + let mut new = MsrHypercallValue(value); + probes::hyperv_wrmsr_hypercall!(|| ( + value, + new.gpa().0, + new.locked(), + new.enabled() + )); + + let mut inner = self.inner.lock().unwrap(); + let old = inner.msr_hypercall_value; + + // This MSR is immutable once the Locked bit is set. + if old.locked() { + return WrmsrOutcome::Handled; + } + + // If this MSR is written when no guest OS ID is set, the Enabled bit is + // cleared and the write succeeds. + if inner.msr_guest_os_id_value == 0 { + new.clear_enabled(); + } + + // If the Enabled bit is not set, there's nothing to try to expose to + // the guest. + if !new.enabled() { + inner.msr_hypercall_value = new; + return WrmsrOutcome::Handled; + } + + let memctx = self + .acc_mem + .access() + .expect("guest memory is always accessible during wrmsr"); + + let region = GuestRegion(new.gpa(), PAGE_SIZE); + + // Mapping will fail if the requested GPA is out of the guest's physical + // address range. The TLFS specifies that this should raise #GP. + let Some(mapping) = memctx.writable_region(®ion) else { + probes::hyperv_wrmsr_hypercall_bad_gpa!(|| new.gpa().0); + return WrmsrOutcome::GpException; + }; + + // Write the hypercall instruction sequence to the requested GPA. + // + // TODO: TLFS section 5.2.1 specifies that when an overlay is removed, + // "the underlying GPA page is 'uncovered', and an existing mapping + // becomes accessible to the guest." Empirically, at least some other + // Hv#1 implementations don't appear to follow this rule, and most + // common guest OSes don't rely on being able to disable or remove the + // hypercall page. Nevertheless, Propolis should eventually follow this + // rule. + write_overlay_page(&mapping, &hypercall_page_contents()); + + inner.msr_hypercall_value = new; + WrmsrOutcome::Handled + } +} + +impl super::Enlightenment for HyperV { + fn add_cpuid(&self, cpuid: &mut CpuidSet) -> Result<(), AddCpuidError> { + let mut to_add = CpuidSet::new(cpuid.vendor()); + + let mut add_to_set = |id, val| { + to_add + .insert(id, val) + .expect("Hyper-V CPUID values don't conflict"); + }; + + add_to_set(CpuidIdent::leaf(0x4000_0001), HYPERV_LEAF_1_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0002), HYPERV_LEAF_2_VALUES); + add_to_set( + CpuidIdent::leaf(0x4000_0003), + CpuidValues { + eax: HyperVLeaf3Eax::default().bits(), + ..Default::default() + }, + ); + + add_to_set(CpuidIdent::leaf(0x4000_0004), HYPERV_LEAF_4_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0005), HYPERV_LEAF_5_VALUES); + + // Set the maximum available CPUID leaf to the smallest value required + // to expose all of the enlightenment's features. + // + // WARNING: In at least some versions of propolis-server, the CPUID + // configuration generated by this enlightenment is not part of the + // instance description that the migration source sends to its target. + // Instead, the source sends the target its *enlightenment + // configuration* and assumes that the target will produce the same + // CPUID settings the source produced. This includes the maximum + // available enlightenment leaf: it should not be set to the maximum + // leaf this version of Propolis knows about, but to the maximum leaf + // required by the features enabled in this enlightenment stack. + add_to_set( + CpuidIdent::leaf(0x4000_0000), + bits::hyperv_leaf_0_values(0x4000_0005), + ); + + super::add_cpuid(cpuid, to_add) + } + + fn rdmsr(&self, vcpu: VcpuId, msr: MsrId) -> RdmsrOutcome { + match msr.0 { + HV_X64_MSR_GUEST_OS_ID => RdmsrOutcome::Handled( + self.inner.lock().unwrap().msr_guest_os_id_value, + ), + HV_X64_MSR_HYPERCALL => RdmsrOutcome::Handled( + self.inner.lock().unwrap().msr_hypercall_value.0, + ), + HV_X64_MSR_VP_INDEX => { + let id: u32 = vcpu.into(); + RdmsrOutcome::Handled(id as u64) + } + _ => RdmsrOutcome::NotHandled, + } + } + + fn wrmsr(&self, _vcpu: VcpuId, msr: MsrId, value: u64) -> WrmsrOutcome { + match msr.0 { + HV_X64_MSR_GUEST_OS_ID => self.handle_wrmsr_guest_os_id(value), + HV_X64_MSR_HYPERCALL => self.handle_wrmsr_hypercall(value), + HV_X64_MSR_VP_INDEX => WrmsrOutcome::GpException, + _ => WrmsrOutcome::NotHandled, + } + } + + fn attach(&self, mem_acc: &MemAccessor) { + mem_acc.adopt(&self.acc_mem, Some(TYPE_NAME.to_owned())); + } +} + +fn write_overlay_page(mapping: &SubMapping<'_>, contents: &[u8; PAGE_SIZE]) { + let written = mapping + .write_bytes(contents) + .expect("overlay pages are always writable"); + + assert_eq!(written, PAGE_SIZE, "overlay pages can be written completely"); +} + +impl Lifecycle for HyperV { + fn type_name(&self) -> &'static str { + TYPE_NAME + } + + fn reset(&self) { + let mut inner = self.inner.lock().unwrap(); + *inner = Inner::default(); + } + + fn migrate(&'_ self) -> Migrator<'_> { + Migrator::Single(self) + } +} + +impl MigrateSingle for HyperV { + fn export( + &self, + _ctx: &MigrateCtx, + ) -> Result { + let inner = self.inner.lock().unwrap(); + Ok(migrate::HyperVEnlightenmentV1 { + msr_guest_os_id: inner.msr_guest_os_id_value, + msr_hypercall: inner.msr_hypercall_value.0, + } + .into()) + } + + fn import( + &self, + mut offer: PayloadOffer, + ctx: &MigrateCtx, + ) -> Result<(), MigrateStateError> { + let data: migrate::HyperVEnlightenmentV1 = offer.parse()?; + + // A well-behaved source should ensure that the hypercall MSR value is + // within the guest's PA range and that its Enabled bit agrees with the + // value of the guest OS ID MSR. But this data was received over the + // wire, so for safety's sake, verify it all and return a migration + // error if anything is inconsistent. + let hypercall_msr = MsrHypercallValue(data.msr_hypercall); + if hypercall_msr.enabled() { + if data.msr_guest_os_id == 0 { + return Err(MigrateStateError::ImportFailed( + "hypercall MSR enabled but guest OS ID MSR is 0" + .to_string(), + )); + } + + let Some(mapping) = ctx + .mem + .writable_region(&GuestRegion(hypercall_msr.gpa(), PAGE_SIZE)) + else { + return Err(MigrateStateError::ImportFailed(format!( + "couldn't map hypercall page for MSR value \ + {hypercall_msr:?}" + ))); + }; + + write_overlay_page(&mapping, &hypercall_page_contents()); + } + + let mut inner = self.inner.lock().unwrap(); + inner.msr_guest_os_id_value = data.msr_guest_os_id; + inner.msr_hypercall_value = hypercall_msr; + Ok(()) + } +} + +mod migrate { + use serde::{Deserialize, Serialize}; + + use crate::migrate::{Schema, SchemaId}; + + #[derive(Debug, Serialize, Deserialize)] + pub struct HyperVEnlightenmentV1 { + pub(super) msr_guest_os_id: u64, + pub(super) msr_hypercall: u64, + } + + impl Schema<'_> for HyperVEnlightenmentV1 { + fn id() -> SchemaId { + (super::TYPE_NAME, 1) + } + } +} diff --git a/lib/propolis/src/enlightenment/mod.rs b/lib/propolis/src/enlightenment/mod.rs index 86c4ec12e..bcf5bc55d 100644 --- a/lib/propolis/src/enlightenment/mod.rs +++ b/lib/propolis/src/enlightenment/mod.rs @@ -67,6 +67,7 @@ use crate::{ }; pub mod bhyve; +pub mod hyperv; /// Functionality provided by all enlightenment interfaces. pub trait Enlightenment: Lifecycle + Send + Sync { diff --git a/lib/propolis/src/msr.rs b/lib/propolis/src/msr.rs index a5651f63c..98d691fcb 100644 --- a/lib/propolis/src/msr.rs +++ b/lib/propolis/src/msr.rs @@ -10,7 +10,7 @@ pub struct MsrId(pub u32); /// An outcome resulting from a request to emulate the RDMSR instruction. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RdmsrOutcome { /// This RDMSR was not handled. The caller must decide how to dispose of it. NotHandled, @@ -25,7 +25,7 @@ pub enum RdmsrOutcome { } /// An outcome resulting from a request to emulate the WRMSR instruction. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum WrmsrOutcome { /// This WRMSR was not handled. The caller must decide how to dispose of it. NotHandled, diff --git a/lib/propolis/src/vmm/mem.rs b/lib/propolis/src/vmm/mem.rs index 370c0a973..31257963c 100644 --- a/lib/propolis/src/vmm/mem.rs +++ b/lib/propolis/src/vmm/mem.rs @@ -625,7 +625,7 @@ impl SubMapping<'_> { /// If `buf` is larger than the SubMapping, the write will be truncated to /// length of the SubMapping. /// - /// Returns the number of bytes read. + /// Returns the number of bytes written. pub fn write_bytes(&self, buf: &[u8]) -> Result { let write_len = usize::min(buf.len(), self.len); self.write_many(&buf[..write_len])?; diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index f9441b94b..3cebdb179 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1051,9 +1051,46 @@ "type" ], "additionalProperties": false + }, + { + "description": "Expose a Hyper-V-compatible hypervisor interface with the supplied features enabled.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "HyperV" + ] + }, + "value": { + "type": "object", + "properties": { + "features": { + "type": "array", + "items": { + "$ref": "#/components/schemas/HyperVFeatureFlag" + }, + "uniqueItems": true + } + }, + "required": [ + "features" + ], + "additionalProperties": false + } + }, + "required": [ + "type", + "value" + ], + "additionalProperties": false } ] }, + "HyperVFeatureFlag": { + "description": "Flags that enable \"simple\" Hyper-V enlightenments that require no feature-specific configuration.", + "type": "string" + }, "I440Fx": { "description": "An Intel 440FX-compatible chipset.", "type": "object", diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index a97d2b142..cb586fd8a 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -10,9 +10,9 @@ use propolis_client::{ support::nvme_serial_from_str, types::{ Board, BootOrderEntry, BootSettings, Chipset, ComponentV0, Cpuid, - CpuidEntry, CpuidVendor, InstanceMetadata, InstanceSpecV0, - MigrationFailureInjector, NvmeDisk, SerialPort, SerialPortNumber, - VirtioDisk, + CpuidEntry, CpuidVendor, GuestHypervisorInterface, InstanceMetadata, + InstanceSpecV0, MigrationFailureInjector, NvmeDisk, SerialPort, + SerialPortNumber, VirtioDisk, }, PciPath, SpecKey, }; @@ -56,6 +56,7 @@ pub struct VmConfig<'dr> { boot_order: Option>, disks: Vec>, migration_failure: Option, + guest_hv_interface: Option, } impl<'dr> VmConfig<'dr> { @@ -75,6 +76,7 @@ impl<'dr> VmConfig<'dr> { boot_order: None, disks: Vec::new(), migration_failure: None, + guest_hv_interface: None, }; config.boot_disk( @@ -112,6 +114,14 @@ impl<'dr> VmConfig<'dr> { self } + pub fn guest_hv_interface( + &mut self, + interface: GuestHypervisorInterface, + ) -> &mut Self { + self.guest_hv_interface = Some(interface); + self + } + pub fn fail_migration_exports(&mut self, exports: u32) -> &mut Self { let injector = self.migration_failure.get_or_insert(MigrationFailureInjector { @@ -211,6 +221,7 @@ impl<'dr> VmConfig<'dr> { boot_order, disks, migration_failure, + guest_hv_interface, } = self; let bootrom_path = framework @@ -288,7 +299,7 @@ impl<'dr> VmConfig<'dr> { cpuid_utils::CpuidVendor::Intel => CpuidVendor::Intel, }, }), - guest_hv_interface: None, + guest_hv_interface: guest_hv_interface.clone(), }, components: Default::default(), }; diff --git a/phd-tests/tests/src/hyperv.rs b/phd-tests/tests/src/hyperv.rs new file mode 100644 index 000000000..050b0f75d --- /dev/null +++ b/phd-tests/tests/src/hyperv.rs @@ -0,0 +1,82 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use phd_framework::{artifacts, lifecycle::Action, TestVm}; +use phd_testcase::*; +use tracing::warn; + +/// Attempts to see if the guest has detected Hyper-V support. This is +/// best-effort, since not all PHD guest images contain in-box tools that +/// display the current hypervisor vendor. +/// +/// NOTE: If the guest lacks a facility to check the hypervisor vendor, this +/// routine logs a warning but does not return a "Skipped" result. This allows +/// the smoke tests to return a Pass result to show that they exercised VM +/// startup and shutdown with Hyper-V emulation enabled. +async fn guest_detect_hyperv(vm: &TestVm) -> anyhow::Result<()> { + if vm.guest_os_kind().is_linux() { + // Many Linux distros come with systemd installed out of the box. On + // these distros, it's easiest to use `systemd-detect-virt` to determine + // whether the guest thinks it's running on a Hyper-V-compatible + // hypervisor. (Whether any actual enlightenments are enabled is another + // story, but those can often be detected by other means.) + let out = vm.run_shell_command("systemd-detect-virt").await?; + if out.contains("systemd-detect-virt: not found") { + warn!( + "guest doesn't support systemd-detect-virt, can't verify it \ + detected Hyper-V support" + ); + } else { + assert_eq!(out, "microsoft"); + } + } else if vm.guest_os_kind().is_windows() { + // Windows is good about giving signals that it's running in a Hyper-V + // *root partition*, but offers no clear signal as to whether it has + // detected a Hyper-V host when it's running as a non-root guest. (There + // are methods for detecting whether Windows is running as a guest, but + // these don't identify the detected hypervisor type.) + warn!("running on Windows, can't verify it detected Hyper-V support"); + } + + Ok(()) +} + +#[phd_testcase] +async fn hyperv_smoke_test(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("hyperv_smoke_test"); + cfg.guest_hv_interface( + propolis_client::types::GuestHypervisorInterface::HyperV { + features: vec![], + }, + ); + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + guest_detect_hyperv(&vm).await?; +} + +#[phd_testcase] +async fn hyperv_migration_smoke_test(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("hyperv_migration_smoke_test"); + cfg.guest_hv_interface( + propolis_client::types::GuestHypervisorInterface::HyperV { + features: vec![], + }, + ); + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + ctx.lifecycle_test( + vm, + &[Action::MigrateToPropolis(artifacts::DEFAULT_PROPOLIS_ARTIFACT)], + |target: &TestVm| { + Box::pin(async { + guest_detect_hyperv(target).await.unwrap(); + }) + }, + ) + .await?; +} diff --git a/phd-tests/tests/src/lib.rs b/phd-tests/tests/src/lib.rs index b76f0bec1..c894ae8bd 100644 --- a/phd-tests/tests/src/lib.rs +++ b/phd-tests/tests/src/lib.rs @@ -10,6 +10,7 @@ mod crucible; mod disk; mod framework; mod hw; +mod hyperv; mod migrate; mod server_state_machine; mod smoke;