From d6be4199d91724d88c6438696290e776774d8be7 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Jan 2025 00:40:37 +0000 Subject: [PATCH 01/20] lib: look ma, I'm Hyper-V --- lib/propolis/src/common.rs | 2 +- lib/propolis/src/enlightenment/hyperv/bits.rs | 122 ++++++++++ .../src/enlightenment/hyperv/hypercall.rs | 60 +++++ lib/propolis/src/enlightenment/hyperv/mod.rs | 219 ++++++++++++++++++ .../src/enlightenment/hyperv/overlay.rs | 133 +++++++++++ lib/propolis/src/enlightenment/mod.rs | 1 + lib/propolis/src/vmm/mem.rs | 8 +- 7 files changed, 543 insertions(+), 2 deletions(-) create mode 100644 lib/propolis/src/enlightenment/hyperv/bits.rs create mode 100644 lib/propolis/src/enlightenment/hyperv/hypercall.rs create mode 100644 lib/propolis/src/enlightenment/hyperv/mod.rs create mode 100644 lib/propolis/src/enlightenment/hyperv/overlay.rs diff --git a/lib/propolis/src/common.rs b/lib/propolis/src/common.rs index e80d85090..7d118d611 100644 --- a/lib/propolis/src/common.rs +++ b/lib/propolis/src/common.rs @@ -467,7 +467,7 @@ impl RWOp<'_, '_> { } /// An address within a guest VM. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct GuestAddr(pub u64); impl GuestAddr { diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs new file mode 100644 index 000000000..133270dff --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -0,0 +1,122 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Constant definitions and flags for Hyper-V emulations. These are drawn from +//! the Hyper-V TLFS version 6.0b (referred to as "TLFS" below). See the parent +//! module documentation for more details. + +use cpuid_utils::CpuidValues; + +/// The maximum hypervisor CPUID leaf supported by the Hyper-V emulation stack. +/// This must be at least 0x4000_0005 to comply with the Hyper-V spec, but to +/// minimize future complexity, Propolis advertises up to the maximum specified +/// leaf of 0x4000_000A. +const HYPERV_MAX_CPUID_LEAF: u32 = 0x4000000A; + +/// CPUID leaf 0x4000_0000 contains hypervisor identifying information. eax +/// receives the highest valid CPUID leaf in the hypervisor range. ebx, ecx, and +/// edx receive a 12-byte vendor ID. +/// +/// In order to get both Linux and Windows guests to accept these +/// enlightenments, the ebx/ecx/edx ID here is set to "Microsoft Hv". Windows +/// guests will accept other vendor IDs (they look at leaf 1 eax to identify the +/// hypervisor interface instead of reading the vendor ID in leaf 0), but Linux +/// guests only consider the vendor ID. +pub(super) const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues { + eax: HYPERV_MAX_CPUID_LEAF, + ebx: 0x7263694D, + ecx: 0x666F736F, + edx: 0x76482074, +}; + +/// Hyper-V leaf 0x4000_0001 contains an (ostensibly vendor-neutral) interface +/// identifier. eax receives "Hv#1"; the other three outputs are reserved. +pub(super) const HYPERV_LEAF_1_VALUES: CpuidValues = + CpuidValues { eax: 0x31237648, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0002 contains hypervisor version information. To avoid +/// having to reason about what it means to expose a specific hypervisor version +/// across a live migration between potentially different host and/or Propolis +/// versions, this information is always set to 0. +pub(super) const HYPERV_LEAF_2_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +bitflags::bitflags! { + /// Hyper-V leaf 0x4000_0003 eax returns synthetic MSR access rights. + /// Only the bits actually enabled by this enlightenment stack are + /// enumerated here. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct HyperVLeaf3Eax: u32 { + const PARTITION_REFERENCE_COUNTER = 1 << 1; + const HYPERCALL = 1 << 5; + const VP_INDEX = 1 << 6; + const PARTITION_REFERENCE_TSC = 1 << 9; + + // Bits 14-31 of this register are reserved. + } +} + +/// Hyper-V leaf 0x4000_0004 describes behavior that the guest OS should +/// implement for optimal performance. Propolis expresses no opinion about these +/// options, except that it indicates in ebx that the guest should never try to +/// notify the hypervisor about failed spinlock acquisitions. +pub(super) const HYPERV_LEAF_4_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0xFFFFFFFF, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0005 describes the hypervisor's CPU and interrupt +/// remapping limits. Hypervisors are allowed not to expose these limits by +/// publishing 0s to this leaf. +pub(super) const HYPERV_LEAF_5_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0006 advertises that the host OS is aware of and may be +/// making use of assorted hardware features. +pub(super) const HYPERV_LEAF_6_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0007 advertises CPU management features that are +/// available to a Hyper-V root partition. Since Propolis guests are by +/// definition never running as a Hyper-V root partition, these values are 0. +pub(super) const HYPERV_LEAF_7_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0008 describes the hypervisor's support for emulation of +/// shared virtual memory. +pub(super) const HYPERV_LEAF_8_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_0009 appears to describe the access rights afforded to +/// an L1 hypervisor running on top of an L0 Hyper-V instance. +pub(super) const HYPERV_LEAF_9_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Hyper-V leaf 0x4000_000A appears to describe the virtualization +/// optimizations available to an L1 hypervisor running on top of an L0 Hyper-V +/// instance. +pub(super) const HYPERV_LEAF_A_VALUES: CpuidValues = + CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; + +/// Allows the guest to report its type and version information. See TLFS +/// section 2.6 for details about this MSR's format. +/// +/// Guest OSes are (theoretically) required to identify themselves by writing to +/// this MSR before they try to enable the hypercall code page. +/// +/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. +pub(super) const MSR_GUEST_OS_ID: u32 = 0x4000_0000; + +/// Specifies the guest physical address at which the guest would like to place +/// the hypercall page. See TLFS section 3.13 and the [`MsrHypercalLValue`] +/// struct. +/// +/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. +/// +/// [`MsrHypercallValue`]: super::hypercall::MsrHypercallValue +pub(super) const MSR_HYPERCALL: u32 = 0x4000_0001; + +/// Guests may read this register to obtain the index of the vCPU that read the +/// register. +/// +/// Read-only; requires the [`HyperVLeaf3Eax::VP_INDEX`] privilege. +pub(super) const MSR_VP_INDEX: u32 = 0x4000_0002; diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs new file mode 100644 index 000000000..76471e6b1 --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -0,0 +1,60 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Support for hypercalls and their related MSRs. + +use crate::common::{GuestAddr, PAGE_SHIFT, PAGE_SIZE}; + +/// Represents a value written to the [`MSR_HYPERCALL`] register. +/// +/// Writing to this register enables the hypercall page. The hypervisor overlays +/// this page with an instruction sequence that the guest should execute in +/// order to issue a call to the hypervisor. See +/// [`HYPERCALL_INSTRUCTION_SEQUENCE`]. +/// +/// [`MSR_HYPERCALL`]: super::bits::MSR_HYPERCALL +#[derive(Clone, Copy, Debug, Default)] +pub(super) struct MsrHypercallValue(pub(super) u64); + +impl MsrHypercallValue { + /// Returns the guest physical page number at which the guest would like the + /// hypercall page to be placed. + pub fn gpfn(&self) -> u64 { + self.0 >> 12 + } + + /// Returns the guest physical address at which the guest would like the + /// hypercall page to be placed. + pub fn gpa(&self) -> GuestAddr { + GuestAddr(self.gpfn() << PAGE_SHIFT) + } + + /// Returns whether the hypercall page location is locked. Once locked, the + /// value in `MSR_HYPERCALL` cannot change until the system is reset. + pub fn locked(&self) -> bool { + (self.0 & 2) != 0 + } + + /// Indicates whether the hypercall page is enabled. + pub fn enabled(&self) -> bool { + (self.0 & 1) != 0 + } +} + +/// The sequence of instructions to write to the hypercall page. This sequence +/// is `mov rax, 2; ret`, which returns a "not supported" status for all +/// hypercalls without actually requiring the guest to exit. +// +// If and when actual hypercall support is required, this should change to +// either `0f 01 c1` (VMCALL) or `0f 01 d9` (VMMCALL), depending on whether the +// host is VMX- or SVM-based. +const HYPERCALL_INSTRUCTION_SEQUENCE: [u8; 8] = + [0x48, 0xc7, 0xc0, 0x02, 0x00, 0x00, 0x00, 0xc3]; + +/// Yields a page-sized buffer containing the contents of the hypercall page. +pub(super) fn hypercall_page_contents() -> [u8; PAGE_SIZE] { + let mut page = [0u8; PAGE_SIZE]; + page[0..8].copy_from_slice(&HYPERCALL_INSTRUCTION_SEQUENCE); + page +} diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs new file mode 100644 index 000000000..22d5c7754 --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -0,0 +1,219 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Support for Microsoft Hyper-V emulation. +//! +//! Windows guests and many Linux guests can interoperate with hypervisors that +//! implement the hypervisor described in Microsoft's Hypervisor Top-Level +//! Functional Specification (TLFS). The behavior in this module is based on +//! version 6.0b of the TLFS, which is available on GitHub: +//! https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/main/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf +//! +//! Microsoft also maintains a list of minimum requirements for any hypervisor +//! that intends to implement a Hyper-V-compatible interface: +//! https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/main/tlfs/Requirements%20for%20Implementing%20the%20Microsoft%20Hypervisor%20Interface.pdf + +use std::sync::{Arc, Mutex}; + +use bits::*; +use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; +use hypercall::{hypercall_page_contents, MsrHypercallValue}; +use overlay::OverlayPage; +use slog::{info, warn}; + +use crate::{ + accessors::MemAccessor, + common::{Lifecycle, VcpuId}, + enlightenment::AddCpuidError, + migrate::Migrator, + msr::{MsrId, RdmsrOutcome, WrmsrOutcome}, +}; + +mod bits; +mod hypercall; +mod overlay; + +const TYPE_NAME: &str = "guest-hyperv-interface"; + +#[derive(Default)] +struct Inner { + /// The last value stored in the [`bits::MSR_GUEST_OS_ID`] MSR. + msr_guest_os_id_value: u64, + + /// The last value stored in the [`bits::MSR_HYPERCALL`] MSR. + msr_hypercall_value: MsrHypercallValue, + + /// The previous contents of the guest physical page that has been overlaid + /// with the hypercall page. + hypercall_overlay: Option, +} + +impl std::fmt::Debug for Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Inner") + .field("msr_guest_os_id_value", &self.msr_guest_os_id_value) + .field("msr_hypercall_value", &self.msr_hypercall_value) + .field( + "old_hypercall_page_contents.is_some()", + &self.hypercall_overlay.is_some(), + ) + .finish() + } +} + +pub struct HyperV { + log: slog::Logger, + inner: Mutex, + acc_mem: MemAccessor, +} + +impl HyperV { + pub fn new(log: slog::Logger) -> Arc { + let acc_mem = MemAccessor::new_orphan(); + Arc::new(Self { log, inner: Mutex::new(Inner::default()), acc_mem }) + } + + fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { + let mut inner = self.inner.lock().unwrap(); + + // TODO(gjc): TLFS section 3.13 says that if a guest clears the guest OS ID + // register, the hypercall page "become[s] disabled." It's unclear + // whether this means just that the Enabled bit in the relevant register + // is cleared or whether it also means that the hypercall page overlay + // is removed. + if value == 0 { + warn!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); + } + + inner.msr_guest_os_id_value = value; + WrmsrOutcome::Handled + } + + fn handle_wrmsr_hypercall(&self, value: u64) -> WrmsrOutcome { + let mut inner = self.inner.lock().unwrap(); + + // If the previous value of the hypercall register has the lock bit set, + // ignore future modifications. + let old = inner.msr_hypercall_value; + if old.locked() { + return WrmsrOutcome::Handled; + } + + let new = MsrHypercallValue(value); + if new.enabled() { + match inner.hypercall_overlay.as_mut() { + Some(overlay) => { + if overlay.move_to(new.gpa()) { + info!(&self.log, "moved hypercall page"; + "guest_pfn" => new.gpfn()); + + WrmsrOutcome::Handled + } else { + WrmsrOutcome::GpException + } + } + None => { + let overlay = OverlayPage::create( + self.acc_mem.child(None), + new.gpa(), + &hypercall_page_contents(), + ); + + if overlay.is_some() { + info!(&self.log, "enabled hypercall page"; + "guest_pfn" => new.gpfn()); + + inner.hypercall_overlay = overlay; + WrmsrOutcome::Handled + } else { + WrmsrOutcome::GpException + } + } + } + } else { + inner.hypercall_overlay.take(); + WrmsrOutcome::Handled + } + } +} + +impl super::Enlightenment for HyperV { + fn add_cpuid(&self, cpuid: &mut CpuidSet) -> Result<(), AddCpuidError> { + let mut to_add = CpuidSet::new(cpuid.vendor()); + + let mut add_to_set = |id, val| { + to_add + .insert(id, val) + .expect("Hyper-V CPUID values don't conflict"); + }; + + add_to_set(CpuidIdent::leaf(0x4000_0000), HYPERV_LEAF_0_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0001), HYPERV_LEAF_1_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0002), HYPERV_LEAF_2_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0004), HYPERV_LEAF_4_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0005), HYPERV_LEAF_5_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0006), HYPERV_LEAF_6_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0007), HYPERV_LEAF_7_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0008), HYPERV_LEAF_8_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0009), HYPERV_LEAF_9_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_000A), HYPERV_LEAF_A_VALUES); + + let leaf_3_eax = HyperVLeaf3Eax::HYPERCALL | HyperVLeaf3Eax::VP_INDEX; + + add_to_set( + CpuidIdent::leaf(0x4000_0003), + CpuidValues { eax: leaf_3_eax.bits(), ..Default::default() }, + ); + + super::add_cpuid(cpuid, to_add) + } + + fn rdmsr(&self, vcpu: VcpuId, msr: MsrId) -> RdmsrOutcome { + match msr.0 { + MSR_GUEST_OS_ID => RdmsrOutcome::Handled( + self.inner.lock().unwrap().msr_guest_os_id_value, + ), + MSR_HYPERCALL => RdmsrOutcome::Handled( + self.inner.lock().unwrap().msr_hypercall_value.0, + ), + MSR_VP_INDEX => { + let id: u32 = vcpu.into(); + RdmsrOutcome::Handled(id as u64) + } + _ => RdmsrOutcome::NotHandled, + } + } + + fn wrmsr(&self, _vcpu: VcpuId, msr: MsrId, value: u64) -> WrmsrOutcome { + match msr.0 { + MSR_GUEST_OS_ID => self.handle_wrmsr_guest_os_id(value), + MSR_HYPERCALL => self.handle_wrmsr_hypercall(value), + MSR_VP_INDEX => WrmsrOutcome::GpException, + _ => WrmsrOutcome::NotHandled, + } + } + + fn attach(&self, mem_acc: &MemAccessor) { + mem_acc.adopt(&self.acc_mem, Some(TYPE_NAME.to_owned())); + } +} + +impl Lifecycle for HyperV { + fn type_name(&self) -> &'static str { + TYPE_NAME + } + + fn reset(&self) { + let mut inner = self.inner.lock().unwrap(); + + // The contents of guest memory are going to be completely reinitialized + // anyway, so there's no need to manually remove any overlay pages. + *inner = Inner::default(); + } + + // TODO: Migration support. + fn migrate(&'_ self) -> Migrator<'_> { + Migrator::NonMigratable + } +} diff --git a/lib/propolis/src/enlightenment/hyperv/overlay.rs b/lib/propolis/src/enlightenment/hyperv/overlay.rs new file mode 100644 index 000000000..0ec8d15ea --- /dev/null +++ b/lib/propolis/src/enlightenment/hyperv/overlay.rs @@ -0,0 +1,133 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helpers for dealing with overlays onto guest physical pages. + +use crate::{ + accessors::MemAccessor, + common::{GuestAddr, GuestRegion, PAGE_SIZE}, +}; + +pub(super) struct OverlayPage { + mem_acc: MemAccessor, + addr: GuestAddr, + old_contents: Box<[u8; PAGE_SIZE]>, +} + +impl OverlayPage { + pub(super) fn create( + mem_acc: MemAccessor, + addr: GuestAddr, + contents: &[u8; PAGE_SIZE], + ) -> Option { + let mem_ctx = mem_acc + .access() + .expect("overlay pages can always access guest memory"); + + let region = GuestRegion(addr, PAGE_SIZE); + let mapping = mem_ctx.read_write_region(®ion)?; + + let mut old_contents = Box::new([0u8; PAGE_SIZE]); + let bytes_read = + mapping.read_bytes(old_contents.as_mut_slice()).ok()?; + if bytes_read != PAGE_SIZE { + return None; + } + + let bytes_written = mapping + .write_bytes(contents) + .expect("mapping is at least PAGE_SIZE and is writable"); + + assert_eq!(bytes_written, PAGE_SIZE); + + Some(Self { mem_acc, addr, old_contents }) + } + + pub(super) fn move_to(&mut self, new_addr: GuestAddr) -> bool { + // Do nothing if the page isn't actually moving. + if new_addr == self.addr { + return true; + } + + let mem_ctx = self + .mem_acc + .access() + .expect("overlay pages can always access guest memory"); + + // Map the new overlay page. This is the only fallible operation in this + // routine, since the old overlay had a valid mapping. + let new_region = GuestRegion(self.addr, PAGE_SIZE); + let Some(new_mapping) = mem_ctx.read_write_region(&new_region) else { + return false; + }; + + // Back up the contents of the new overlay page. + let mut new_saved = [0u8; PAGE_SIZE]; + assert_eq!( + new_mapping + .read_bytes(new_saved.as_mut_slice()) + .expect("new overlay page is readable"), + PAGE_SIZE + ); + + // Read the overlaid data from the old mapping and write it to the new + // page. + let old_region = GuestRegion(self.addr, PAGE_SIZE); + let old_mapping = mem_ctx + .read_write_region(&old_region) + .expect("old overlay page is readable and writable"); + + let mut to_move = [0u8; PAGE_SIZE]; + assert_eq!( + old_mapping + .read_bytes(to_move.as_mut_slice()) + .expect("old overlay page is readable"), + PAGE_SIZE + ); + + assert_eq!( + new_mapping + .write_bytes(to_move.as_slice()) + .expect("new overlay page is writable"), + PAGE_SIZE + ); + + // Restore the data from the old mapping to the old page. + assert_eq!( + old_mapping + .write_bytes(self.old_contents.as_slice()) + .expect("old overlay page is writable"), + PAGE_SIZE + ); + + // Save the new mapping's previous data so it can be restored later. + self.old_contents.copy_from_slice(&to_move); + + true + } + + fn remove(&mut self) { + let mem_ctx = self + .mem_acc + .access() + .expect("overlay pages can always access guest memory"); + + let region = GuestRegion(self.addr, PAGE_SIZE); + let mapping = mem_ctx + .read_write_region(®ion) + .expect("overlay page was writable before"); + + let bytes_written = mapping + .write_bytes(self.old_contents.as_slice()) + .expect("overlay region is writable and page-sized"); + + assert_eq!(bytes_written, PAGE_SIZE); + } +} + +impl Drop for OverlayPage { + fn drop(&mut self) { + self.remove(); + } +} diff --git a/lib/propolis/src/enlightenment/mod.rs b/lib/propolis/src/enlightenment/mod.rs index 86c4ec12e..bcf5bc55d 100644 --- a/lib/propolis/src/enlightenment/mod.rs +++ b/lib/propolis/src/enlightenment/mod.rs @@ -67,6 +67,7 @@ use crate::{ }; pub mod bhyve; +pub mod hyperv; /// Functionality provided by all enlightenment interfaces. pub trait Enlightenment: Lifecycle + Send + Sync { diff --git a/lib/propolis/src/vmm/mem.rs b/lib/propolis/src/vmm/mem.rs index 370c0a973..4b024e415 100644 --- a/lib/propolis/src/vmm/mem.rs +++ b/lib/propolis/src/vmm/mem.rs @@ -625,7 +625,7 @@ impl SubMapping<'_> { /// If `buf` is larger than the SubMapping, the write will be truncated to /// length of the SubMapping. /// - /// Returns the number of bytes read. + /// Returns the number of bytes written. pub fn write_bytes(&self, buf: &[u8]) -> Result { let write_len = usize::min(buf.len(), self.len); self.write_many(&buf[..write_len])?; @@ -910,6 +910,12 @@ impl MemCtx { let mapping = self.region_covered(region.0, region.1, Prot::READ)?; Some(mapping) } + pub fn read_write_region( + &self, + region: &GuestRegion, + ) -> Option { + self.region_covered(region.0, region.1, Prot::RW) + } /// Like `direct_writable_region`, but looks up the region by name. pub fn direct_writable_region_by_name( From 76719ef58b41bef14a111e49b66522c184a9a4b4 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Jan 2025 19:16:00 +0000 Subject: [PATCH 02/20] server/api: hook up Hyper-V enlightenment option --- bin/propolis-server/src/lib/vm/ensure.rs | 24 +++++++++++++++---- .../src/instance_spec/components/board.rs | 8 +++++++ lib/propolis/src/enlightenment/hyperv/mod.rs | 6 ++--- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 8c1a9b251..6c4e89b3d 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -29,7 +29,9 @@ use std::sync::Arc; use oximeter::types::ProducerRegistry; use oximeter_instruments::kstat::KstatSampler; -use propolis::enlightenment::{bhyve::BhyveGuestInterface, Enlightenment}; +use propolis::enlightenment::{ + bhyve::BhyveGuestInterface, hyperv::HyperV, Enlightenment, +}; use propolis_api_types::{ instance_spec::components::board::GuestHypervisorInterface, InstanceEnsureResponse, InstanceMigrateInitiateResponse, @@ -391,9 +393,21 @@ async fn initialize_vm_objects( let vmm_log = log.new(slog::o!("component" => "vmm")); - let guest_hv_interface = match spec.board.guest_hv_interface { - GuestHypervisorInterface::Bhyve => Arc::new(BhyveGuestInterface), - }; + let (guest_hv_interface, guest_hv_lifecycle) = + match spec.board.guest_hv_interface { + GuestHypervisorInterface::Bhyve => { + let bhyve = Arc::new(BhyveGuestInterface); + let lifecycle = bhyve.clone(); + (bhyve as Arc, lifecycle.as_lifecycle()) + } + GuestHypervisorInterface::HyperV { .. } => { + let hyperv = Arc::new(HyperV::new( + vmm_log.new(slog::o!("component" => "guest-hv-interface")), + )); + let lifecycle = hyperv.clone(); + (hyperv as Arc, lifecycle.as_lifecycle()) + } + }; // Set up the 'shell' instance into which the rest of this routine will // add components. @@ -458,7 +472,7 @@ async fn initialize_vm_objects( let ramfb = init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?; - init.register_guest_hv_interface(guest_hv_interface.as_lifecycle()); + init.register_guest_hv_interface(guest_hv_lifecycle); init.initialize_cpus().await?; let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( &machine, diff --git a/crates/propolis-api-types/src/instance_spec/components/board.rs b/crates/propolis-api-types/src/instance_spec/components/board.rs index 9dd879090..9613599cb 100644 --- a/crates/propolis-api-types/src/instance_spec/components/board.rs +++ b/crates/propolis-api-types/src/instance_spec/components/board.rs @@ -92,6 +92,10 @@ pub struct CpuidEntry { pub edx: u32, } +#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] +#[serde(deny_unknown_fields, tag = "type", content = "value")] +pub enum HyperVFeature {} + /// A hypervisor interface to expose to the guest. #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema, Default)] #[serde(deny_unknown_fields, tag = "type", content = "value")] @@ -100,6 +104,10 @@ pub enum GuestHypervisorInterface { /// leaf 0x4000_0000 and no additional leaves or features). #[default] Bhyve, + + /// Expose a Hyper-V-compatible hypervisor interface with the supplied + /// features enabled. + HyperV { features: Vec }, } impl GuestHypervisorInterface { diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 22d5c7754..40c9f8cc8 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -14,7 +14,7 @@ //! that intends to implement a Hyper-V-compatible interface: //! https://github.com/MicrosoftDocs/Virtualization-Documentation/blob/main/tlfs/Requirements%20for%20Implementing%20the%20Microsoft%20Hypervisor%20Interface.pdf -use std::sync::{Arc, Mutex}; +use std::sync::Mutex; use bits::*; use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; @@ -69,9 +69,9 @@ pub struct HyperV { } impl HyperV { - pub fn new(log: slog::Logger) -> Arc { + pub fn new(log: slog::Logger) -> Self { let acc_mem = MemAccessor::new_orphan(); - Arc::new(Self { log, inner: Mutex::new(Inner::default()), acc_mem }) + Self { log, inner: Mutex::new(Inner::default()), acc_mem } } fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { From a8eef7078eb3363531ed3cb64982bf135e6675d7 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Jan 2025 20:53:17 +0000 Subject: [PATCH 03/20] OpenAPI update --- openapi/propolis-server.json | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index f9441b94b..c73807ef3 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1051,9 +1051,44 @@ "type" ], "additionalProperties": false + }, + { + "description": "Expose a Hyper-V-compatible hypervisor interface with the supplied features enabled.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "HyperV" + ] + }, + "value": { + "type": "object", + "properties": { + "features": { + "type": "array", + "items": { + "$ref": "#/components/schemas/HyperVFeature" + } + } + }, + "required": [ + "features" + ], + "additionalProperties": false + } + }, + "required": [ + "type", + "value" + ], + "additionalProperties": false } ] }, + "HyperVFeature": { + "oneOf": [] + }, "I440Fx": { "description": "An Intel 440FX-compatible chipset.", "type": "object", From 6012c0a82b21a73e9b1bbcd197c7214872da5260 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Jan 2025 20:53:17 +0000 Subject: [PATCH 04/20] phd: Hyper-V smoke test --- phd-tests/framework/src/test_vm/config.rs | 19 +++++++++++++++---- phd-tests/tests/src/hyperv.rs | 22 ++++++++++++++++++++++ phd-tests/tests/src/lib.rs | 1 + 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 phd-tests/tests/src/hyperv.rs diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index a97d2b142..cb586fd8a 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -10,9 +10,9 @@ use propolis_client::{ support::nvme_serial_from_str, types::{ Board, BootOrderEntry, BootSettings, Chipset, ComponentV0, Cpuid, - CpuidEntry, CpuidVendor, InstanceMetadata, InstanceSpecV0, - MigrationFailureInjector, NvmeDisk, SerialPort, SerialPortNumber, - VirtioDisk, + CpuidEntry, CpuidVendor, GuestHypervisorInterface, InstanceMetadata, + InstanceSpecV0, MigrationFailureInjector, NvmeDisk, SerialPort, + SerialPortNumber, VirtioDisk, }, PciPath, SpecKey, }; @@ -56,6 +56,7 @@ pub struct VmConfig<'dr> { boot_order: Option>, disks: Vec>, migration_failure: Option, + guest_hv_interface: Option, } impl<'dr> VmConfig<'dr> { @@ -75,6 +76,7 @@ impl<'dr> VmConfig<'dr> { boot_order: None, disks: Vec::new(), migration_failure: None, + guest_hv_interface: None, }; config.boot_disk( @@ -112,6 +114,14 @@ impl<'dr> VmConfig<'dr> { self } + pub fn guest_hv_interface( + &mut self, + interface: GuestHypervisorInterface, + ) -> &mut Self { + self.guest_hv_interface = Some(interface); + self + } + pub fn fail_migration_exports(&mut self, exports: u32) -> &mut Self { let injector = self.migration_failure.get_or_insert(MigrationFailureInjector { @@ -211,6 +221,7 @@ impl<'dr> VmConfig<'dr> { boot_order, disks, migration_failure, + guest_hv_interface, } = self; let bootrom_path = framework @@ -288,7 +299,7 @@ impl<'dr> VmConfig<'dr> { cpuid_utils::CpuidVendor::Intel => CpuidVendor::Intel, }, }), - guest_hv_interface: None, + guest_hv_interface: guest_hv_interface.clone(), }, components: Default::default(), }; diff --git a/phd-tests/tests/src/hyperv.rs b/phd-tests/tests/src/hyperv.rs new file mode 100644 index 000000000..eda2cff1e --- /dev/null +++ b/phd-tests/tests/src/hyperv.rs @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use phd_testcase::*; +use tracing::info; + +#[phd_testcase] +async fn hyperv_smoke_test(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("hyperv_smoke_test"); + cfg.guest_hv_interface( + propolis_client::types::GuestHypervisorInterface::HyperV { + features: vec![], + }, + ); + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + let out = vm.run_shell_command("systemd-detect-virt").await?; + info!(out, "detected hypervisor"); +} diff --git a/phd-tests/tests/src/lib.rs b/phd-tests/tests/src/lib.rs index b76f0bec1..c894ae8bd 100644 --- a/phd-tests/tests/src/lib.rs +++ b/phd-tests/tests/src/lib.rs @@ -10,6 +10,7 @@ mod crucible; mod disk; mod framework; mod hw; +mod hyperv; mod migrate; mod server_state_machine; mod smoke; From 694de1b6b027ba1ea1208d16543c9828802a523c Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Jan 2025 22:00:41 +0000 Subject: [PATCH 05/20] phd: improve test & fix a bug it found --- lib/propolis/src/enlightenment/hyperv/mod.rs | 10 +++++++ phd-tests/tests/src/hyperv.rs | 31 ++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 40c9f8cc8..17a607793 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -212,6 +212,16 @@ impl Lifecycle for HyperV { *inner = Inner::default(); } + fn halt(&self) { + let mut inner = self.inner.lock().unwrap(); + + // The overlay page module assumes that it always has access to guest + // memory, which is not the case if overlays are being dropped because + // the VM has completely shut down. Manually remove any overlays before + // that happens. + inner.hypercall_overlay.take(); + } + // TODO: Migration support. fn migrate(&'_ self) -> Migrator<'_> { Migrator::NonMigratable diff --git a/phd-tests/tests/src/hyperv.rs b/phd-tests/tests/src/hyperv.rs index eda2cff1e..eed5ecb86 100644 --- a/phd-tests/tests/src/hyperv.rs +++ b/phd-tests/tests/src/hyperv.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use phd_testcase::*; -use tracing::info; +use tracing::warn; #[phd_testcase] async fn hyperv_smoke_test(ctx: &Framework) { @@ -17,6 +17,31 @@ async fn hyperv_smoke_test(ctx: &Framework) { vm.launch().await?; vm.wait_to_boot().await?; - let out = vm.run_shell_command("systemd-detect-virt").await?; - info!(out, "detected hypervisor"); + // Make a best-effort attempt to detect that Hyper-V is actually present in + // the guest. It's valuable to run this test to completion regardless since + // it exercises Propolis shutdown while the Hyper-V enlightenment stack is + // active. + if vm.guest_os_kind().is_linux() { + // Many Linux distros come with systemd installed out of the box. On + // these distros, it's easiest to use `systemd-detect-virt` to determine + // whether the guest thinks it's running on a Hyper-V-compatible + // hypervisor. (Whether any actual enlightenments are enabled is another + // story, but those can often be detected by other means.) + let out = vm.run_shell_command("systemd-detect-virt").await?; + if out.contains("systemd-detect-virt: not found") { + warn!( + "guest doesn't support systemd-detect-virt, can't verify it \ + detected Hyper-V support" + ); + } else { + assert_eq!(out, "microsoft"); + } + } else if vm.guest_os_kind().is_windows() { + // Windows is good about giving signals that it's running in a Hyper-V + // *root partition*, but offers no clear signal as to whether it has + // detected a Hyper-V host when it's running as a non-root guest. (There + // are methods for detecting whether Windows is running as a guest, but + // these don't identify the detected hypervisor type.) + warn!("running on Windows, can't verify it detected Hyper-V support"); + } } From 67bb549ed6d9e39e80d2b175a98a2048ec93e9ba Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 31 Jan 2025 03:54:05 +0000 Subject: [PATCH 06/20] lib: remove complicated overlay page type; improve names --- lib/propolis/src/enlightenment/hyperv/bits.rs | 6 +- .../src/enlightenment/hyperv/hypercall.rs | 5 + lib/propolis/src/enlightenment/hyperv/mod.rs | 148 +++++++++--------- .../src/enlightenment/hyperv/overlay.rs | 133 ---------------- lib/propolis/src/msr.rs | 4 +- 5 files changed, 80 insertions(+), 216 deletions(-) delete mode 100644 lib/propolis/src/enlightenment/hyperv/overlay.rs diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs index 133270dff..c01316b6d 100644 --- a/lib/propolis/src/enlightenment/hyperv/bits.rs +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -104,7 +104,7 @@ pub(super) const HYPERV_LEAF_A_VALUES: CpuidValues = /// this MSR before they try to enable the hypercall code page. /// /// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. -pub(super) const MSR_GUEST_OS_ID: u32 = 0x4000_0000; +pub(super) const HV_X64_MSR_GUEST_OS_ID: u32 = 0x4000_0000; /// Specifies the guest physical address at which the guest would like to place /// the hypercall page. See TLFS section 3.13 and the [`MsrHypercalLValue`] @@ -113,10 +113,10 @@ pub(super) const MSR_GUEST_OS_ID: u32 = 0x4000_0000; /// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. /// /// [`MsrHypercallValue`]: super::hypercall::MsrHypercallValue -pub(super) const MSR_HYPERCALL: u32 = 0x4000_0001; +pub(super) const HV_X64_MSR_HYPERCALL: u32 = 0x4000_0001; /// Guests may read this register to obtain the index of the vCPU that read the /// register. /// /// Read-only; requires the [`HyperVLeaf3Eax::VP_INDEX`] privilege. -pub(super) const MSR_VP_INDEX: u32 = 0x4000_0002; +pub(super) const HV_X64_MSR_VP_INDEX: u32 = 0x4000_0002; diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs index 76471e6b1..a6cb42c9a 100644 --- a/lib/propolis/src/enlightenment/hyperv/hypercall.rs +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -40,6 +40,11 @@ impl MsrHypercallValue { pub fn enabled(&self) -> bool { (self.0 & 1) != 0 } + + /// Clears this value's enabled bit. + pub fn clear_enabled(&mut self) { + self.0 &= !1; + } } /// The sequence of instructions to write to the hypercall page. This sequence diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 17a607793..71ffe2582 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -19,47 +19,29 @@ use std::sync::Mutex; use bits::*; use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; use hypercall::{hypercall_page_contents, MsrHypercallValue}; -use overlay::OverlayPage; -use slog::{info, warn}; +use slog::info; use crate::{ accessors::MemAccessor, - common::{Lifecycle, VcpuId}, + common::{GuestRegion, Lifecycle, VcpuId, PAGE_SIZE}, enlightenment::AddCpuidError, migrate::Migrator, msr::{MsrId, RdmsrOutcome, WrmsrOutcome}, + vmm::SubMapping, }; mod bits; mod hypercall; -mod overlay; const TYPE_NAME: &str = "guest-hyperv-interface"; -#[derive(Default)] +#[derive(Debug, Default)] struct Inner { /// The last value stored in the [`bits::MSR_GUEST_OS_ID`] MSR. msr_guest_os_id_value: u64, /// The last value stored in the [`bits::MSR_HYPERCALL`] MSR. msr_hypercall_value: MsrHypercallValue, - - /// The previous contents of the guest physical page that has been overlaid - /// with the hypercall page. - hypercall_overlay: Option, -} - -impl std::fmt::Debug for Inner { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Inner") - .field("msr_guest_os_id_value", &self.msr_guest_os_id_value) - .field("msr_hypercall_value", &self.msr_hypercall_value) - .field( - "old_hypercall_page_contents.is_some()", - &self.hypercall_overlay.is_some(), - ) - .finish() - } } pub struct HyperV { @@ -77,64 +59,76 @@ impl HyperV { fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { let mut inner = self.inner.lock().unwrap(); - // TODO(gjc): TLFS section 3.13 says that if a guest clears the guest OS ID - // register, the hypercall page "become[s] disabled." It's unclear - // whether this means just that the Enabled bit in the relevant register - // is cleared or whether it also means that the hypercall page overlay - // is removed. + // TLFS section 3.13 specifies that if the guest OS ID register is + // cleared, then the hypercall page immediately "becomes disabled." The + // exact semantics of "becoming disabled" are unclear, but in context it + // seems most reasonable to read this as "the Enabled bit is cleared and + // the hypercall overlay is removed." if value == 0 { - warn!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); + info!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); + inner.msr_hypercall_value.clear_enabled(); } inner.msr_guest_os_id_value = value; WrmsrOutcome::Handled } + /// Handles a write to the HV_X64_MSR_HYPERCALL register. See TLFS section + /// 3.13. fn handle_wrmsr_hypercall(&self, value: u64) -> WrmsrOutcome { let mut inner = self.inner.lock().unwrap(); - - // If the previous value of the hypercall register has the lock bit set, - // ignore future modifications. let old = inner.msr_hypercall_value; + + // TLFS section 3.13 says that this MSR is "immutable" once the locked + // bit is set. if old.locked() { return WrmsrOutcome::Handled; } - let new = MsrHypercallValue(value); - if new.enabled() { - match inner.hypercall_overlay.as_mut() { - Some(overlay) => { - if overlay.move_to(new.gpa()) { - info!(&self.log, "moved hypercall page"; - "guest_pfn" => new.gpfn()); - - WrmsrOutcome::Handled - } else { - WrmsrOutcome::GpException - } - } - None => { - let overlay = OverlayPage::create( - self.acc_mem.child(None), - new.gpa(), - &hypercall_page_contents(), - ); - - if overlay.is_some() { - info!(&self.log, "enabled hypercall page"; - "guest_pfn" => new.gpfn()); - - inner.hypercall_overlay = overlay; - WrmsrOutcome::Handled - } else { - WrmsrOutcome::GpException - } - } + // If this MSR is written when no guest OS ID is set, the Enabled bit is + // cleared and the write succeeds. + let mut new = MsrHypercallValue(value); + if inner.msr_guest_os_id_value == 0 { + new.clear_enabled(); + } + + // If the Enabled bit is set, expose the hypercall instruction sequence + // at the requested GPA. The TLFS specifies that this raises #GP if the + // selected physical address is outside the bounds of the guest's + // physical address space. + // + // The TLFS describes this page as an "overlay" that "covers whatever + // else is mapped to the GPA range." The spec is ambiguous as to whether + // this means that the page's previous contents must be restored if the + // page is disabled or its address later changes. Empirically, most + // common guest OSes don't appear to move the page after creating it, + // and at least some other hypervisors don't bother with saving and + // restoring the old page's contents. So, for simplicity, simply write + // the hypercall instruction sequence to the requested page and call it + // a day. + let outcome = if new.enabled() { + let memctx = self + .acc_mem + .access() + .expect("guest memory is always accessible during wrmsr"); + + let region = GuestRegion(new.gpa(), PAGE_SIZE); + if let Some(mapping) = memctx.writable_region(®ion) { + write_overlay_page(&mapping, &hypercall_page_contents()); + WrmsrOutcome::Handled + } else { + WrmsrOutcome::GpException } } else { - inner.hypercall_overlay.take(); WrmsrOutcome::Handled + }; + + // Commit the new MSR value if the write was handled. + if outcome == WrmsrOutcome::Handled { + inner.msr_hypercall_value = new; } + + outcome } } @@ -171,13 +165,13 @@ impl super::Enlightenment for HyperV { fn rdmsr(&self, vcpu: VcpuId, msr: MsrId) -> RdmsrOutcome { match msr.0 { - MSR_GUEST_OS_ID => RdmsrOutcome::Handled( + HV_X64_MSR_GUEST_OS_ID => RdmsrOutcome::Handled( self.inner.lock().unwrap().msr_guest_os_id_value, ), - MSR_HYPERCALL => RdmsrOutcome::Handled( + HV_X64_MSR_HYPERCALL => RdmsrOutcome::Handled( self.inner.lock().unwrap().msr_hypercall_value.0, ), - MSR_VP_INDEX => { + HV_X64_MSR_VP_INDEX => { let id: u32 = vcpu.into(); RdmsrOutcome::Handled(id as u64) } @@ -187,9 +181,9 @@ impl super::Enlightenment for HyperV { fn wrmsr(&self, _vcpu: VcpuId, msr: MsrId, value: u64) -> WrmsrOutcome { match msr.0 { - MSR_GUEST_OS_ID => self.handle_wrmsr_guest_os_id(value), - MSR_HYPERCALL => self.handle_wrmsr_hypercall(value), - MSR_VP_INDEX => WrmsrOutcome::GpException, + HV_X64_MSR_GUEST_OS_ID => self.handle_wrmsr_guest_os_id(value), + HV_X64_MSR_HYPERCALL => self.handle_wrmsr_hypercall(value), + HV_X64_MSR_VP_INDEX => WrmsrOutcome::GpException, _ => WrmsrOutcome::NotHandled, } } @@ -199,6 +193,14 @@ impl super::Enlightenment for HyperV { } } +fn write_overlay_page(mapping: &SubMapping<'_>, contents: &[u8; PAGE_SIZE]) { + let written = mapping + .write_bytes(contents) + .expect("overlay pages are always writable"); + + assert_eq!(written, PAGE_SIZE, "overlay pages can be written completely"); +} + impl Lifecycle for HyperV { fn type_name(&self) -> &'static str { TYPE_NAME @@ -212,16 +214,6 @@ impl Lifecycle for HyperV { *inner = Inner::default(); } - fn halt(&self) { - let mut inner = self.inner.lock().unwrap(); - - // The overlay page module assumes that it always has access to guest - // memory, which is not the case if overlays are being dropped because - // the VM has completely shut down. Manually remove any overlays before - // that happens. - inner.hypercall_overlay.take(); - } - // TODO: Migration support. fn migrate(&'_ self) -> Migrator<'_> { Migrator::NonMigratable diff --git a/lib/propolis/src/enlightenment/hyperv/overlay.rs b/lib/propolis/src/enlightenment/hyperv/overlay.rs deleted file mode 100644 index 0ec8d15ea..000000000 --- a/lib/propolis/src/enlightenment/hyperv/overlay.rs +++ /dev/null @@ -1,133 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Helpers for dealing with overlays onto guest physical pages. - -use crate::{ - accessors::MemAccessor, - common::{GuestAddr, GuestRegion, PAGE_SIZE}, -}; - -pub(super) struct OverlayPage { - mem_acc: MemAccessor, - addr: GuestAddr, - old_contents: Box<[u8; PAGE_SIZE]>, -} - -impl OverlayPage { - pub(super) fn create( - mem_acc: MemAccessor, - addr: GuestAddr, - contents: &[u8; PAGE_SIZE], - ) -> Option { - let mem_ctx = mem_acc - .access() - .expect("overlay pages can always access guest memory"); - - let region = GuestRegion(addr, PAGE_SIZE); - let mapping = mem_ctx.read_write_region(®ion)?; - - let mut old_contents = Box::new([0u8; PAGE_SIZE]); - let bytes_read = - mapping.read_bytes(old_contents.as_mut_slice()).ok()?; - if bytes_read != PAGE_SIZE { - return None; - } - - let bytes_written = mapping - .write_bytes(contents) - .expect("mapping is at least PAGE_SIZE and is writable"); - - assert_eq!(bytes_written, PAGE_SIZE); - - Some(Self { mem_acc, addr, old_contents }) - } - - pub(super) fn move_to(&mut self, new_addr: GuestAddr) -> bool { - // Do nothing if the page isn't actually moving. - if new_addr == self.addr { - return true; - } - - let mem_ctx = self - .mem_acc - .access() - .expect("overlay pages can always access guest memory"); - - // Map the new overlay page. This is the only fallible operation in this - // routine, since the old overlay had a valid mapping. - let new_region = GuestRegion(self.addr, PAGE_SIZE); - let Some(new_mapping) = mem_ctx.read_write_region(&new_region) else { - return false; - }; - - // Back up the contents of the new overlay page. - let mut new_saved = [0u8; PAGE_SIZE]; - assert_eq!( - new_mapping - .read_bytes(new_saved.as_mut_slice()) - .expect("new overlay page is readable"), - PAGE_SIZE - ); - - // Read the overlaid data from the old mapping and write it to the new - // page. - let old_region = GuestRegion(self.addr, PAGE_SIZE); - let old_mapping = mem_ctx - .read_write_region(&old_region) - .expect("old overlay page is readable and writable"); - - let mut to_move = [0u8; PAGE_SIZE]; - assert_eq!( - old_mapping - .read_bytes(to_move.as_mut_slice()) - .expect("old overlay page is readable"), - PAGE_SIZE - ); - - assert_eq!( - new_mapping - .write_bytes(to_move.as_slice()) - .expect("new overlay page is writable"), - PAGE_SIZE - ); - - // Restore the data from the old mapping to the old page. - assert_eq!( - old_mapping - .write_bytes(self.old_contents.as_slice()) - .expect("old overlay page is writable"), - PAGE_SIZE - ); - - // Save the new mapping's previous data so it can be restored later. - self.old_contents.copy_from_slice(&to_move); - - true - } - - fn remove(&mut self) { - let mem_ctx = self - .mem_acc - .access() - .expect("overlay pages can always access guest memory"); - - let region = GuestRegion(self.addr, PAGE_SIZE); - let mapping = mem_ctx - .read_write_region(®ion) - .expect("overlay page was writable before"); - - let bytes_written = mapping - .write_bytes(self.old_contents.as_slice()) - .expect("overlay region is writable and page-sized"); - - assert_eq!(bytes_written, PAGE_SIZE); - } -} - -impl Drop for OverlayPage { - fn drop(&mut self) { - self.remove(); - } -} diff --git a/lib/propolis/src/msr.rs b/lib/propolis/src/msr.rs index a5651f63c..98d691fcb 100644 --- a/lib/propolis/src/msr.rs +++ b/lib/propolis/src/msr.rs @@ -10,7 +10,7 @@ pub struct MsrId(pub u32); /// An outcome resulting from a request to emulate the RDMSR instruction. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RdmsrOutcome { /// This RDMSR was not handled. The caller must decide how to dispose of it. NotHandled, @@ -25,7 +25,7 @@ pub enum RdmsrOutcome { } /// An outcome resulting from a request to emulate the WRMSR instruction. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum WrmsrOutcome { /// This WRMSR was not handled. The caller must decide how to dispose of it. NotHandled, From 41d90342fbd6d15adebf5b1166c519385a5f192b Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 4 Feb 2025 17:31:07 +0000 Subject: [PATCH 07/20] lib: hyper-v migration support --- .../src/enlightenment/hyperv/hypercall.rs | 4 +- lib/propolis/src/enlightenment/hyperv/mod.rs | 88 +++++++++++++++++-- phd-tests/tests/src/hyperv.rs | 62 +++++++++---- 3 files changed, 127 insertions(+), 27 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs index a6cb42c9a..056816b5c 100644 --- a/lib/propolis/src/enlightenment/hyperv/hypercall.rs +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -6,14 +6,14 @@ use crate::common::{GuestAddr, PAGE_SHIFT, PAGE_SIZE}; -/// Represents a value written to the [`MSR_HYPERCALL`] register. +/// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register. /// /// Writing to this register enables the hypercall page. The hypervisor overlays /// this page with an instruction sequence that the guest should execute in /// order to issue a call to the hypervisor. See /// [`HYPERCALL_INSTRUCTION_SEQUENCE`]. /// -/// [`MSR_HYPERCALL`]: super::bits::MSR_HYPERCALL +/// [`HV_X64_MSR_HYPERCALL`]: super::bits::HV_X64_MSR_HYPERCALL #[derive(Clone, Copy, Debug, Default)] pub(super) struct MsrHypercallValue(pub(super) u64); diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 71ffe2582..87b969e7a 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -25,7 +25,10 @@ use crate::{ accessors::MemAccessor, common::{GuestRegion, Lifecycle, VcpuId, PAGE_SIZE}, enlightenment::AddCpuidError, - migrate::Migrator, + migrate::{ + MigrateCtx, MigrateSingle, MigrateStateError, Migrator, PayloadOffer, + PayloadOutput, + }, msr::{MsrId, RdmsrOutcome, WrmsrOutcome}, vmm::SubMapping, }; @@ -59,11 +62,13 @@ impl HyperV { fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { let mut inner = self.inner.lock().unwrap(); - // TLFS section 3.13 specifies that if the guest OS ID register is - // cleared, then the hypercall page immediately "becomes disabled." The - // exact semantics of "becoming disabled" are unclear, but in context it - // seems most reasonable to read this as "the Enabled bit is cleared and - // the hypercall overlay is removed." + // TLFS section 3.13 says that the hypercall page "becomes disabled" if + // the guest OS ID register is cleared after the hypercall register is + // set. It also specifies that attempts to set the Enabled bit in that + // register will be ignored if the guest OS ID is zeroed, so handle this + // case by clearing the hypercall MSR's Enabled bit but otherwise + // leaving the hypercall page untouched (as would happen if the guest + // manually cleared this bit). if value == 0 { info!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); inner.msr_hypercall_value.clear_enabled(); @@ -79,7 +84,7 @@ impl HyperV { let mut inner = self.inner.lock().unwrap(); let old = inner.msr_hypercall_value; - // TLFS section 3.13 says that this MSR is "immutable" once the locked + // TLFS section 3.13 says that this MSR is immutable once the locked // bit is set. if old.locked() { return WrmsrOutcome::Handled; @@ -214,8 +219,73 @@ impl Lifecycle for HyperV { *inner = Inner::default(); } - // TODO: Migration support. fn migrate(&'_ self) -> Migrator<'_> { - Migrator::NonMigratable + Migrator::Single(self) + } +} + +impl MigrateSingle for HyperV { + fn export( + &self, + _ctx: &MigrateCtx, + ) -> Result { + let inner = self.inner.lock().unwrap(); + Ok(migrate::HyperVEnlightenmentV1 { + msr_guest_os_id: inner.msr_guest_os_id_value, + msr_hypercall: inner.msr_hypercall_value.0, + } + .into()) + } + + fn import( + &self, + mut offer: PayloadOffer, + ctx: &MigrateCtx, + ) -> Result<(), MigrateStateError> { + let data: migrate::HyperVEnlightenmentV1 = offer.parse()?; + + let hypercall_msr = MsrHypercallValue(data.msr_hypercall); + if hypercall_msr.enabled() { + if data.msr_guest_os_id == 0 { + return Err(MigrateStateError::ImportFailed( + "hypercall MSR enabled but guest OS ID MSR is 0" + .to_string(), + )); + } + + let Some(mapping) = ctx + .mem + .writable_region(&GuestRegion(hypercall_msr.gpa(), PAGE_SIZE)) + else { + return Err(MigrateStateError::ImportFailed( + "couldn't map GPA in hypercall MSR".to_string(), + )); + }; + + write_overlay_page(&mapping, &hypercall_page_contents()); + } + + let mut inner = self.inner.lock().unwrap(); + inner.msr_guest_os_id_value = data.msr_guest_os_id; + inner.msr_hypercall_value = hypercall_msr; + Ok(()) + } +} + +mod migrate { + use serde::{Deserialize, Serialize}; + + use crate::migrate::{Schema, SchemaId}; + + #[derive(Debug, Serialize, Deserialize)] + pub struct HyperVEnlightenmentV1 { + pub(super) msr_guest_os_id: u64, + pub(super) msr_hypercall: u64, + } + + impl Schema<'_> for HyperVEnlightenmentV1 { + fn id() -> SchemaId { + (super::TYPE_NAME, 1) + } } } diff --git a/phd-tests/tests/src/hyperv.rs b/phd-tests/tests/src/hyperv.rs index eed5ecb86..7ca5211da 100644 --- a/phd-tests/tests/src/hyperv.rs +++ b/phd-tests/tests/src/hyperv.rs @@ -2,25 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use phd_framework::{artifacts, lifecycle::Action, TestVm}; use phd_testcase::*; use tracing::warn; -#[phd_testcase] -async fn hyperv_smoke_test(ctx: &Framework) { - let mut cfg = ctx.vm_config_builder("hyperv_smoke_test"); - cfg.guest_hv_interface( - propolis_client::types::GuestHypervisorInterface::HyperV { - features: vec![], - }, - ); - let mut vm = ctx.spawn_vm(&cfg, None).await?; - vm.launch().await?; - vm.wait_to_boot().await?; - - // Make a best-effort attempt to detect that Hyper-V is actually present in - // the guest. It's valuable to run this test to completion regardless since - // it exercises Propolis shutdown while the Hyper-V enlightenment stack is - // active. +/// Attempts to see if the guest has detected Hyper-V support. This is +/// best-effort, since not all PHD guest images contain in-box tools that +/// display the current hypervisor vendor. +async fn guest_detect_hyperv(vm: &TestVm) -> anyhow::Result<()> { if vm.guest_os_kind().is_linux() { // Many Linux distros come with systemd installed out of the box. On // these distros, it's easiest to use `systemd-detect-virt` to determine @@ -44,4 +33,45 @@ async fn hyperv_smoke_test(ctx: &Framework) { // these don't identify the detected hypervisor type.) warn!("running on Windows, can't verify it detected Hyper-V support"); } + + Ok(()) +} + +#[phd_testcase] +async fn hyperv_smoke_test(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("hyperv_smoke_test"); + cfg.guest_hv_interface( + propolis_client::types::GuestHypervisorInterface::HyperV { + features: vec![], + }, + ); + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + guest_detect_hyperv(&vm).await?; +} + +#[phd_testcase] +async fn hyperv_migration_smoke_test(ctx: &Framework) { + let mut cfg = ctx.vm_config_builder("hyperv_migration_smoke_test"); + cfg.guest_hv_interface( + propolis_client::types::GuestHypervisorInterface::HyperV { + features: vec![], + }, + ); + let mut vm = ctx.spawn_vm(&cfg, None).await?; + vm.launch().await?; + vm.wait_to_boot().await?; + + ctx.lifecycle_test( + vm, + &[Action::MigrateToPropolis(artifacts::DEFAULT_PROPOLIS_ARTIFACT)], + |target: &TestVm| { + Box::pin(async { + guest_detect_hyperv(target).await.unwrap(); + }) + }, + ) + .await?; } From f6ff55a688f623a753b5fe6743f61287d42b2464 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 4 Feb 2025 18:35:44 +0000 Subject: [PATCH 08/20] cli: add hyper-v switch --- bin/propolis-cli/src/main.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index acffa1c98..0faa544ba 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -19,10 +19,10 @@ use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag}; use propolis_client::support::nvme_serial_from_str; use propolis_client::types::{ BlobStorageBackend, Board, Chipset, ComponentV0, CrucibleStorageBackend, - I440Fx, InstanceEnsureRequest, InstanceInitializationMethod, - InstanceMetadata, InstanceSpecGetResponse, InstanceSpecV0, NvmeDisk, - QemuPvpanic, ReplacementComponent, SerialPort, SerialPortNumber, - VirtioDisk, + GuestHypervisorInterface, I440Fx, InstanceEnsureRequest, + InstanceInitializationMethod, InstanceMetadata, InstanceSpecGetResponse, + InstanceSpecV0, NvmeDisk, QemuPvpanic, ReplacementComponent, SerialPort, + SerialPortNumber, VirtioDisk, }; use propolis_client::{PciPath, SpecKey}; use propolis_config_toml::spec::SpecConfig; @@ -189,6 +189,10 @@ struct VmConfig { // cloud_init ISO file #[clap(long, action, conflicts_with = "spec")] cloud_init: Option, + + /// enable Hyper-V compatible enlightenments for this VM + #[clap(long, action)] + hyperv: bool, } fn add_component_to_spec( @@ -293,7 +297,11 @@ impl VmConfig { cpuid: None, cpus: self.vcpus, memory_mb: self.memory, - guest_hv_interface: None, + guest_hv_interface: if self.hyperv { + Some(GuestHypervisorInterface::HyperV { features: vec![] }) + } else { + None + }, }, components: Default::default(), }; From be57c4b90fff89ae3b9c2bf491cbea3284dd0f3f Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 4 Feb 2025 21:24:29 +0000 Subject: [PATCH 09/20] lib: hyper-v cleanup --- bin/propolis-server/src/lib/vm/ensure.rs | 4 +- lib/propolis/src/enlightenment/hyperv/bits.rs | 26 +++++++---- lib/propolis/src/enlightenment/hyperv/mod.rs | 46 ++++++++++++++----- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 6c4e89b3d..f210471f3 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -401,9 +401,7 @@ async fn initialize_vm_objects( (bhyve as Arc, lifecycle.as_lifecycle()) } GuestHypervisorInterface::HyperV { .. } => { - let hyperv = Arc::new(HyperV::new( - vmm_log.new(slog::o!("component" => "guest-hv-interface")), - )); + let hyperv = Arc::new(HyperV::new(&vmm_log)); let lifecycle = hyperv.clone(); (hyperv as Arc, lifecycle.as_lifecycle()) } diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs index c01316b6d..b79429d56 100644 --- a/lib/propolis/src/enlightenment/hyperv/bits.rs +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -9,9 +9,11 @@ use cpuid_utils::CpuidValues; /// The maximum hypervisor CPUID leaf supported by the Hyper-V emulation stack. -/// This must be at least 0x4000_0005 to comply with the Hyper-V spec, but to -/// minimize future complexity, Propolis advertises up to the maximum specified -/// leaf of 0x4000_000A. +/// This must be at least 0x4000_0005 to comply with the Hyper-V spec. To avoid +/// having to reason about what this value should be if Propolis ever exposes +/// features from a leaf greater than this, always expose the maximum leaf +/// defined in the TLFS, even if the entries for those leaves advertise no +/// features. const HYPERV_MAX_CPUID_LEAF: u32 = 0x4000000A; /// CPUID leaf 0x4000_0000 contains hypervisor identifying information. eax @@ -20,9 +22,9 @@ const HYPERV_MAX_CPUID_LEAF: u32 = 0x4000000A; /// /// In order to get both Linux and Windows guests to accept these /// enlightenments, the ebx/ecx/edx ID here is set to "Microsoft Hv". Windows -/// guests will accept other vendor IDs (they look at leaf 1 eax to identify the -/// hypervisor interface instead of reading the vendor ID in leaf 0), but Linux -/// guests only consider the vendor ID. +/// guests will accept other vendor IDs (they look at leaf 0x4000_0001 eax to +/// identify the hypervisor interface instead of reading the vendor ID in leaf +/// 0), but Linux guests only consider the vendor ID. pub(super) const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues { eax: HYPERV_MAX_CPUID_LEAF, ebx: 0x7263694D, @@ -44,8 +46,8 @@ pub(super) const HYPERV_LEAF_2_VALUES: CpuidValues = bitflags::bitflags! { /// Hyper-V leaf 0x4000_0003 eax returns synthetic MSR access rights. - /// Only the bits actually enabled by this enlightenment stack are - /// enumerated here. + /// Only the bits actually used by this enlightenment stack are enumerated + /// here. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct HyperVLeaf3Eax: u32 { const PARTITION_REFERENCE_COUNTER = 1 << 1; @@ -57,6 +59,14 @@ bitflags::bitflags! { } } +impl Default for HyperVLeaf3Eax { + /// Grants access to the VP index and hypercall MSRs. This is the minimum + /// set of access rights that all Hyper-V-compatible hypervisors must grant. + fn default() -> Self { + HyperVLeaf3Eax::VP_INDEX | HyperVLeaf3Eax::HYPERCALL + } +} + /// Hyper-V leaf 0x4000_0004 describes behavior that the guest OS should /// implement for optimal performance. Propolis expresses no opinion about these /// options, except that it indicates in ebx that the guest should never try to diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 87b969e7a..61a31774f 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -40,10 +40,10 @@ const TYPE_NAME: &str = "guest-hyperv-interface"; #[derive(Debug, Default)] struct Inner { - /// The last value stored in the [`bits::MSR_GUEST_OS_ID`] MSR. + /// The last value stored in the [`bits::HV_X64_MSR_GUEST_OS_ID`] MSR. msr_guest_os_id_value: u64, - /// The last value stored in the [`bits::MSR_HYPERCALL`] MSR. + /// The last value stored in the [`bits::HV_X64_MSR_HYPERCALL`] MSR. msr_hypercall_value: MsrHypercallValue, } @@ -54,11 +54,14 @@ pub struct HyperV { } impl HyperV { - pub fn new(log: slog::Logger) -> Self { + /// Creates a new Hyper-V enlightenment stack. + pub fn new(log: &slog::Logger) -> Self { let acc_mem = MemAccessor::new_orphan(); + let log = log.new(slog::o!("component" => "hyperv")); Self { log, inner: Mutex::new(Inner::default()), acc_mem } } + /// Handles a write to the HV_X64_MSR_GUEST_OS_ID register. fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { let mut inner = self.inner.lock().unwrap(); @@ -72,6 +75,12 @@ impl HyperV { if value == 0 { info!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); inner.msr_hypercall_value.clear_enabled(); + } else { + info!( + &self.log, + "guest set HV_X64_MSR_GUEST_OS_ID"; + "value" => value + ); } inner.msr_guest_os_id_value = value; @@ -84,8 +93,7 @@ impl HyperV { let mut inner = self.inner.lock().unwrap(); let old = inner.msr_hypercall_value; - // TLFS section 3.13 says that this MSR is immutable once the locked - // bit is set. + // Per the spec, this MSR is immutable once the Locked bit is set. if old.locked() { return WrmsrOutcome::Handled; } @@ -106,7 +114,7 @@ impl HyperV { // else is mapped to the GPA range." The spec is ambiguous as to whether // this means that the page's previous contents must be restored if the // page is disabled or its address later changes. Empirically, most - // common guest OSes don't appear to move the page after creating it, + // guest OSes don't appear to move the page after creating it, // and at least some other hypervisors don't bother with saving and // restoring the old page's contents. So, for simplicity, simply write // the hypercall instruction sequence to the requested page and call it @@ -120,8 +128,19 @@ impl HyperV { let region = GuestRegion(new.gpa(), PAGE_SIZE); if let Some(mapping) = memctx.writable_region(®ion) { write_overlay_page(&mapping, &hypercall_page_contents()); + info!( + &self.log, + "established hypercall page at GPA {:#x}", + new.gpa().0 + ); + WrmsrOutcome::Handled } else { + info!( + &self.log, + "guest requested illegal hypercall GPA {:#x}", + new.gpa().0 + ); WrmsrOutcome::GpException } } else { @@ -158,11 +177,12 @@ impl super::Enlightenment for HyperV { add_to_set(CpuidIdent::leaf(0x4000_0009), HYPERV_LEAF_9_VALUES); add_to_set(CpuidIdent::leaf(0x4000_000A), HYPERV_LEAF_A_VALUES); - let leaf_3_eax = HyperVLeaf3Eax::HYPERCALL | HyperVLeaf3Eax::VP_INDEX; - add_to_set( CpuidIdent::leaf(0x4000_0003), - CpuidValues { eax: leaf_3_eax.bits(), ..Default::default() }, + CpuidValues { + eax: HyperVLeaf3Eax::default().bits(), + ..Default::default() + }, ); super::add_cpuid(cpuid, to_add) @@ -213,9 +233,6 @@ impl Lifecycle for HyperV { fn reset(&self) { let mut inner = self.inner.lock().unwrap(); - - // The contents of guest memory are going to be completely reinitialized - // anyway, so there's no need to manually remove any overlay pages. *inner = Inner::default(); } @@ -244,6 +261,11 @@ impl MigrateSingle for HyperV { ) -> Result<(), MigrateStateError> { let data: migrate::HyperVEnlightenmentV1 = offer.parse()?; + // A well-behaved source should ensure that the hypercall MSR value is + // within the guest's PA range and that its Enabled bit agrees with the + // value of the guest OS ID MSR. But this data was received over the + // wire, so for safety's sake, verify it all and return a migration + // error if anything is inconsistent. let hypercall_msr = MsrHypercallValue(data.msr_hypercall); if hypercall_msr.enabled() { if data.msr_guest_os_id == 0 { From 2385b959b0524947fa5742d2c3c352625605cac3 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 20:25:06 +0000 Subject: [PATCH 10/20] api: make simple hyper-v features into a BTreeSet --- .../src/instance_spec/components/board.rs | 29 ++++++++++++++++--- openapi/propolis-server.json | 10 ++++--- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/crates/propolis-api-types/src/instance_spec/components/board.rs b/crates/propolis-api-types/src/instance_spec/components/board.rs index 9613599cb..6a8eff7cb 100644 --- a/crates/propolis-api-types/src/instance_spec/components/board.rs +++ b/crates/propolis-api-types/src/instance_spec/components/board.rs @@ -5,6 +5,8 @@ //! VM mainboard components. Every VM has a board, even if it has no other //! peripherals. +use std::collections::BTreeSet; + use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -92,9 +94,28 @@ pub struct CpuidEntry { pub edx: u32, } -#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] -#[serde(deny_unknown_fields, tag = "type", content = "value")] -pub enum HyperVFeature {} +/// Flags that enable "simple" Hyper-V enlightenments that require no +/// feature-specific configuration. +// +// NOTE: This enum's variants should never have any associated data (note that +// the type doesn't use serde's `tag` and `content` attributes). If a future +// enlightenment requires associated data, it should be put into a +// `HyperVExtendedFeatures` struct (or similar), and the `HyperV` variant of +// `GuestHypervisorInterface` should be extended to `Option`ally include that +// struct. +#[derive( + Clone, + Deserialize, + Serialize, + Debug, + JsonSchema, + Ord, + PartialOrd, + Eq, + PartialEq, +)] +#[serde(deny_unknown_fields)] +pub enum HyperVFeatureFlag {} /// A hypervisor interface to expose to the guest. #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema, Default)] @@ -107,7 +128,7 @@ pub enum GuestHypervisorInterface { /// Expose a Hyper-V-compatible hypervisor interface with the supplied /// features enabled. - HyperV { features: Vec }, + HyperV { features: BTreeSet }, } impl GuestHypervisorInterface { diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index c73807ef3..3cebdb179 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1068,8 +1068,9 @@ "features": { "type": "array", "items": { - "$ref": "#/components/schemas/HyperVFeature" - } + "$ref": "#/components/schemas/HyperVFeatureFlag" + }, + "uniqueItems": true } }, "required": [ @@ -1086,8 +1087,9 @@ } ] }, - "HyperVFeature": { - "oneOf": [] + "HyperVFeatureFlag": { + "description": "Flags that enable \"simple\" Hyper-V enlightenments that require no feature-specific configuration.", + "type": "string" }, "I440Fx": { "description": "An Intel 440FX-compatible chipset.", From 51dae9a7616d57fc9f045da4778051ba1f0da7a7 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 21:11:42 +0000 Subject: [PATCH 11/20] lib: remove unused Hyper-V leaves, add warning about CPUID config --- lib/propolis/src/enlightenment/hyperv/bits.rs | 60 ++++++++----------- lib/propolis/src/enlightenment/hyperv/mod.rs | 29 ++++++--- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs index b79429d56..0a4f7a463 100644 --- a/lib/propolis/src/enlightenment/hyperv/bits.rs +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -8,13 +8,9 @@ use cpuid_utils::CpuidValues; -/// The maximum hypervisor CPUID leaf supported by the Hyper-V emulation stack. -/// This must be at least 0x4000_0005 to comply with the Hyper-V spec. To avoid -/// having to reason about what this value should be if Propolis ever exposes -/// features from a leaf greater than this, always expose the maximum leaf -/// defined in the TLFS, even if the entries for those leaves advertise no -/// features. -const HYPERV_MAX_CPUID_LEAF: u32 = 0x4000000A; +/// Hyper-V-compatible hypervisors are required to support hypervisor CPUID +/// leaves up to 0x4000_0005. +pub(super) const HYPERV_MIN_REQUIRED_CPUID_LEAF: u32 = 0x40000005; /// CPUID leaf 0x4000_0000 contains hypervisor identifying information. eax /// receives the highest valid CPUID leaf in the hypervisor range. ebx, ecx, and @@ -25,13 +21,32 @@ const HYPERV_MAX_CPUID_LEAF: u32 = 0x4000000A; /// guests will accept other vendor IDs (they look at leaf 0x4000_0001 eax to /// identify the hypervisor interface instead of reading the vendor ID in leaf /// 0), but Linux guests only consider the vendor ID. -pub(super) const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues { - eax: HYPERV_MAX_CPUID_LEAF, +const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues { + eax: HYPERV_MIN_REQUIRED_CPUID_LEAF, ebx: 0x7263694D, ecx: 0x666F736F, edx: 0x76482074, }; +/// Generates values for CPUID leaf 0x4000_0000, which contains hypervisor +/// identifying information. eax receives the value of `max_leaf`, the maximum +/// valid CPUID leaf in the hypervisor range; ebx, ecx, and edx contain an +/// appropriate vendor ID. +/// +/// `max_leaf` supplies the maximum valid CPUID leaf in the hypervisor range. +/// +/// # Panics +/// +/// Panics if `max_leaf` is less than [`HYPERV_MIN_REQUIRED_CPUID_LEAF`]. +pub(super) fn hyperv_leaf_0_values(max_leaf: u32) -> CpuidValues { + assert!( + max_leaf >= HYPERV_MIN_REQUIRED_CPUID_LEAF, + "requested max leaf {max_leaf:#x} less than minimum required" + ); + + CpuidValues { eax: max_leaf, ..HYPERV_LEAF_0_VALUES } +} + /// Hyper-V leaf 0x4000_0001 contains an (ostensibly vendor-neutral) interface /// identifier. eax receives "Hv#1"; the other three outputs are reserved. pub(super) const HYPERV_LEAF_1_VALUES: CpuidValues = @@ -80,33 +95,6 @@ pub(super) const HYPERV_LEAF_4_VALUES: CpuidValues = pub(super) const HYPERV_LEAF_5_VALUES: CpuidValues = CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; -/// Hyper-V leaf 0x4000_0006 advertises that the host OS is aware of and may be -/// making use of assorted hardware features. -pub(super) const HYPERV_LEAF_6_VALUES: CpuidValues = - CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; - -/// Hyper-V leaf 0x4000_0007 advertises CPU management features that are -/// available to a Hyper-V root partition. Since Propolis guests are by -/// definition never running as a Hyper-V root partition, these values are 0. -pub(super) const HYPERV_LEAF_7_VALUES: CpuidValues = - CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; - -/// Hyper-V leaf 0x4000_0008 describes the hypervisor's support for emulation of -/// shared virtual memory. -pub(super) const HYPERV_LEAF_8_VALUES: CpuidValues = - CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; - -/// Hyper-V leaf 0x4000_0009 appears to describe the access rights afforded to -/// an L1 hypervisor running on top of an L0 Hyper-V instance. -pub(super) const HYPERV_LEAF_9_VALUES: CpuidValues = - CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; - -/// Hyper-V leaf 0x4000_000A appears to describe the virtualization -/// optimizations available to an L1 hypervisor running on top of an L0 Hyper-V -/// instance. -pub(super) const HYPERV_LEAF_A_VALUES: CpuidValues = - CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 }; - /// Allows the guest to report its type and version information. See TLFS /// section 2.6 for details about this MSR's format. /// diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 61a31774f..52be548b5 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -166,17 +166,8 @@ impl super::Enlightenment for HyperV { .expect("Hyper-V CPUID values don't conflict"); }; - add_to_set(CpuidIdent::leaf(0x4000_0000), HYPERV_LEAF_0_VALUES); add_to_set(CpuidIdent::leaf(0x4000_0001), HYPERV_LEAF_1_VALUES); add_to_set(CpuidIdent::leaf(0x4000_0002), HYPERV_LEAF_2_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0004), HYPERV_LEAF_4_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0005), HYPERV_LEAF_5_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0006), HYPERV_LEAF_6_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0007), HYPERV_LEAF_7_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0008), HYPERV_LEAF_8_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_0009), HYPERV_LEAF_9_VALUES); - add_to_set(CpuidIdent::leaf(0x4000_000A), HYPERV_LEAF_A_VALUES); - add_to_set( CpuidIdent::leaf(0x4000_0003), CpuidValues { @@ -185,6 +176,26 @@ impl super::Enlightenment for HyperV { }, ); + add_to_set(CpuidIdent::leaf(0x4000_0004), HYPERV_LEAF_4_VALUES); + add_to_set(CpuidIdent::leaf(0x4000_0005), HYPERV_LEAF_5_VALUES); + + // Set the maximum available CPUID leaf to the smallest value required + // to expose all of the enlightenment's features. + // + // WARNING: In at least some versions of propolis-server, the CPUID + // configuration generated by this enlightenment is not part of the + // instance description that the migration source sends to its target. + // Instead, the source sends the target its *enlightenment + // configuration* and assumes that the target will produce the same + // CPUID settings the source produced. This includes the maximum + // available enlightenment leaf: it should not be set to the maximum + // leaf this version of Propolis knows about, but to the maximum leaf + // required by the features enabled in this enlightenment stack. + add_to_set( + CpuidIdent::leaf(0x4000_0000), + bits::hyperv_leaf_0_values(0x4000_0005), + ); + super::add_cpuid(cpuid, to_add) } From a60983eea47634037ff9007a15c87d6caf646c36 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 21:50:40 +0000 Subject: [PATCH 12/20] lib: improve description of guest OS ID MSR --- lib/propolis/src/enlightenment/hyperv/bits.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs index 0a4f7a463..042d83148 100644 --- a/lib/propolis/src/enlightenment/hyperv/bits.rs +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -98,8 +98,8 @@ pub(super) const HYPERV_LEAF_5_VALUES: CpuidValues = /// Allows the guest to report its type and version information. See TLFS /// section 2.6 for details about this MSR's format. /// -/// Guest OSes are (theoretically) required to identify themselves by writing to -/// this MSR before they try to enable the hypercall code page. +/// Guest OSes are required to identify themselves via this MSR before they can +/// set the enabled bit in [`HV_X64_MSR_HYPERCALL`] or make any hypercalls. /// /// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege. pub(super) const HV_X64_MSR_GUEST_OS_ID: u32 = 0x4000_0000; From 6b0da7ac4f98d7b77502f036b24264dedb076bd1 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 21:59:11 +0000 Subject: [PATCH 13/20] lib: explain use of TLFS nomenclature --- lib/propolis/src/enlightenment/hyperv/bits.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/propolis/src/enlightenment/hyperv/bits.rs b/lib/propolis/src/enlightenment/hyperv/bits.rs index 042d83148..ce8bf2e2b 100644 --- a/lib/propolis/src/enlightenment/hyperv/bits.rs +++ b/lib/propolis/src/enlightenment/hyperv/bits.rs @@ -5,6 +5,9 @@ //! Constant definitions and flags for Hyper-V emulations. These are drawn from //! the Hyper-V TLFS version 6.0b (referred to as "TLFS" below). See the parent //! module documentation for more details. +//! +//! Where possible, constants in this module (such as MSR identifiers) are given +//! names that match those used in the TLFS. use cpuid_utils::CpuidValues; From 0b3275a816776752a34a37bda5401a761962c9b8 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 22:00:33 +0000 Subject: [PATCH 14/20] lib: hypercall.rs cleanup --- .../src/enlightenment/hyperv/hypercall.rs | 34 +++++++++++++------ lib/propolis/src/enlightenment/hyperv/mod.rs | 2 +- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs index 056816b5c..6f9db34a2 100644 --- a/lib/propolis/src/enlightenment/hyperv/hypercall.rs +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -6,6 +6,12 @@ use crate::common::{GuestAddr, PAGE_SHIFT, PAGE_SIZE}; +const LOCKED_BIT: u64 = 1; +const LOCKED_MASK: u64 = 1 << LOCKED_BIT; +const ENABLED_BIT: u64 = 0; +const ENABLED_MASK: u64 = 1 << ENABLED_BIT; +const GPA_MASK: u64 = !((1 << PAGE_SHIFT) - 1); + /// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register. /// /// Writing to this register enables the hypercall page. The hypervisor overlays @@ -14,36 +20,42 @@ use crate::common::{GuestAddr, PAGE_SHIFT, PAGE_SIZE}; /// [`HYPERCALL_INSTRUCTION_SEQUENCE`]. /// /// [`HV_X64_MSR_HYPERCALL`]: super::bits::HV_X64_MSR_HYPERCALL -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Copy, Default)] pub(super) struct MsrHypercallValue(pub(super) u64); -impl MsrHypercallValue { - /// Returns the guest physical page number at which the guest would like the - /// hypercall page to be placed. - pub fn gpfn(&self) -> u64 { - self.0 >> 12 +impl std::fmt::Debug for MsrHypercallValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MsrHypercallValue") + .field("raw", &format!("{:#x}", self.0)) + .field("gpa", &format!("{:#x}", self.gpa().0)) + .field("locked", &self.locked()) + .field("enabled", &self.enabled()) + .finish() } +} +impl MsrHypercallValue { /// Returns the guest physical address at which the guest would like the /// hypercall page to be placed. pub fn gpa(&self) -> GuestAddr { - GuestAddr(self.gpfn() << PAGE_SHIFT) + GuestAddr(self.0 & GPA_MASK) } /// Returns whether the hypercall page location is locked. Once locked, the - /// value in `MSR_HYPERCALL` cannot change until the system is reset. + /// value in `MSR_HYPERCALL` cannot change until the hypervisor resets the + /// guest. pub fn locked(&self) -> bool { - (self.0 & 2) != 0 + (self.0 & LOCKED_MASK) != 0 } /// Indicates whether the hypercall page is enabled. pub fn enabled(&self) -> bool { - (self.0 & 1) != 0 + (self.0 & ENABLED_MASK) != 0 } /// Clears this value's enabled bit. pub fn clear_enabled(&mut self) { - self.0 &= !1; + self.0 &= !ENABLED_MASK; } } diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 52be548b5..bc89a226e 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -79,7 +79,7 @@ impl HyperV { info!( &self.log, "guest set HV_X64_MSR_GUEST_OS_ID"; - "value" => value + "value" => ?value ); } From d0b5fe660e68e826ac3f2860cf4aace8f6816c8c Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 22:24:25 +0000 Subject: [PATCH 15/20] lib: clean up use directives --- lib/propolis/src/enlightenment/hyperv/mod.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index bc89a226e..540014d7c 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -16,15 +16,19 @@ use std::sync::Mutex; -use bits::*; use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; -use hypercall::{hypercall_page_contents, MsrHypercallValue}; use slog::info; use crate::{ accessors::MemAccessor, common::{GuestRegion, Lifecycle, VcpuId, PAGE_SIZE}, - enlightenment::AddCpuidError, + enlightenment::{ + hyperv::{ + bits::*, + hypercall::{hypercall_page_contents, MsrHypercallValue}, + }, + AddCpuidError, + }, migrate::{ MigrateCtx, MigrateSingle, MigrateStateError, Migrator, PayloadOffer, PayloadOutput, From 5bf277f7162608a2bc73c34a87f1f941c47e59ae Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 22:29:34 +0000 Subject: [PATCH 16/20] lib: assorted PR feedback --- .../src/enlightenment/hyperv/hypercall.rs | 10 +- lib/propolis/src/enlightenment/hyperv/mod.rs | 121 +++++++++--------- 2 files changed, 69 insertions(+), 62 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs index 6f9db34a2..d6418b6d8 100644 --- a/lib/propolis/src/enlightenment/hyperv/hypercall.rs +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -14,11 +14,15 @@ const GPA_MASK: u64 = !((1 << PAGE_SHIFT) - 1); /// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register. /// -/// Writing to this register enables the hypercall page. The hypervisor overlays -/// this page with an instruction sequence that the guest should execute in -/// order to issue a call to the hypervisor. See +/// Writing to this register enables the hypercall page. The hypervisor +/// overwrites this page with an instruction sequence that the guest should +/// execute in order to issue a call to the hypervisor. See /// [`HYPERCALL_INSTRUCTION_SEQUENCE`]. /// +/// Bits 11:2 of this register are reserved. The TLFS specifies that the guest +/// "should ignore [them] on reads and preserve [them] on writes," but imposes +/// no particular penalties on guests that modify these bits. +/// /// [`HV_X64_MSR_HYPERCALL`]: super::bits::HV_X64_MSR_HYPERCALL #[derive(Clone, Copy, Default)] pub(super) struct MsrHypercallValue(pub(super) u64); diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 540014d7c..82b9df472 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -17,7 +17,6 @@ use std::sync::Mutex; use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; -use slog::info; use crate::{ accessors::MemAccessor, @@ -40,6 +39,14 @@ use crate::{ mod bits; mod hypercall; +#[usdt::provider(provider = "propolis")] +mod probes { + fn hyperv_wrmsr_guest_os_id(val: u64) {} + fn hyperv_wrmsr_hypercall(val: u64, gpa: u64, locked: bool, enabled: bool) { + } + fn hyperv_wrmsr_hypercall_bad_gpa(gpa: u64) {} +} + const TYPE_NAME: &str = "guest-hyperv-interface"; #[derive(Debug, Default)] @@ -52,6 +59,7 @@ struct Inner { } pub struct HyperV { + #[allow(dead_code)] log: slog::Logger, inner: Mutex, acc_mem: MemAccessor, @@ -67,6 +75,7 @@ impl HyperV { /// Handles a write to the HV_X64_MSR_GUEST_OS_ID register. fn handle_wrmsr_guest_os_id(&self, value: u64) -> WrmsrOutcome { + probes::hyperv_wrmsr_guest_os_id!(|| value); let mut inner = self.inner.lock().unwrap(); // TLFS section 3.13 says that the hypercall page "becomes disabled" if @@ -77,14 +86,7 @@ impl HyperV { // leaving the hypercall page untouched (as would happen if the guest // manually cleared this bit). if value == 0 { - info!(&self.log, "guest cleared HV_X64_MSR_GUEST_OS_ID"); inner.msr_hypercall_value.clear_enabled(); - } else { - info!( - &self.log, - "guest set HV_X64_MSR_GUEST_OS_ID"; - "value" => ?value - ); } inner.msr_guest_os_id_value = value; @@ -92,71 +94,71 @@ impl HyperV { } /// Handles a write to the HV_X64_MSR_HYPERCALL register. See TLFS section - /// 3.13. + /// 3.13 and [`MsrHypercallValue`]. fn handle_wrmsr_hypercall(&self, value: u64) -> WrmsrOutcome { + let mut new = MsrHypercallValue(value); + probes::hyperv_wrmsr_hypercall!(|| ( + value, + new.gpa().0, + new.locked(), + new.enabled() + )); + let mut inner = self.inner.lock().unwrap(); let old = inner.msr_hypercall_value; - // Per the spec, this MSR is immutable once the Locked bit is set. + // This MSR is immutable once the Locked bit is set. if old.locked() { return WrmsrOutcome::Handled; } // If this MSR is written when no guest OS ID is set, the Enabled bit is // cleared and the write succeeds. - let mut new = MsrHypercallValue(value); if inner.msr_guest_os_id_value == 0 { new.clear_enabled(); } - // If the Enabled bit is set, expose the hypercall instruction sequence - // at the requested GPA. The TLFS specifies that this raises #GP if the - // selected physical address is outside the bounds of the guest's - // physical address space. - // - // The TLFS describes this page as an "overlay" that "covers whatever - // else is mapped to the GPA range." The spec is ambiguous as to whether - // this means that the page's previous contents must be restored if the - // page is disabled or its address later changes. Empirically, most - // guest OSes don't appear to move the page after creating it, - // and at least some other hypervisors don't bother with saving and - // restoring the old page's contents. So, for simplicity, simply write - // the hypercall instruction sequence to the requested page and call it - // a day. - let outcome = if new.enabled() { - let memctx = self - .acc_mem - .access() - .expect("guest memory is always accessible during wrmsr"); - - let region = GuestRegion(new.gpa(), PAGE_SIZE); - if let Some(mapping) = memctx.writable_region(®ion) { - write_overlay_page(&mapping, &hypercall_page_contents()); - info!( - &self.log, - "established hypercall page at GPA {:#x}", - new.gpa().0 - ); - - WrmsrOutcome::Handled - } else { - info!( - &self.log, - "guest requested illegal hypercall GPA {:#x}", - new.gpa().0 - ); - WrmsrOutcome::GpException - } - } else { - WrmsrOutcome::Handled - }; - - // Commit the new MSR value if the write was handled. - if outcome == WrmsrOutcome::Handled { + // If the Enabled bit is not set, there's nothing to try to expose to + // the guest. + if !new.enabled() { inner.msr_hypercall_value = new; + return WrmsrOutcome::Handled; } - outcome + let memctx = self + .acc_mem + .access() + .expect("guest memory is always accessible during wrmsr"); + + let region = GuestRegion(new.gpa(), PAGE_SIZE); + + // Mapping will fail if the requested GPA is out of the guest's physical + // address range. The TLFS specifies that this should raise #GP. + let Some(mapping) = memctx.writable_region(®ion) else { + probes::hyperv_wrmsr_hypercall_bad_gpa!(|| new.gpa().0); + return WrmsrOutcome::GpException; + }; + + // Write the hypercall instruction sequence to the requested GPA. + // + // Note that the previous contents of this page are not saved. The + // TLFS is somewhat ambiguous as to whether this is required: it + // describes the hypercall page as an "overlay" that "covers whatever + // else is mapped to the GPA range", but does not explicitly state that + // the page's previous contents should be restored if the hypercall page + // is moved or disabled. + // + // Experiments show that at least some other hypervisors that implement + // the Hyper-V interface don't save and restore the hypercall page's + // contents. Doing likewise simplifies this code and, perhaps more + // valuably, saves the enlightenment stack from having to save and + // restore saved overlay pages during live migration. This can, however, + // be changed in the future if a guest turns out to require the "restore + // the old value" semantics. + write_overlay_page(&mapping, &hypercall_page_contents()); + + inner.msr_hypercall_value = new; + WrmsrOutcome::Handled } } @@ -294,9 +296,10 @@ impl MigrateSingle for HyperV { .mem .writable_region(&GuestRegion(hypercall_msr.gpa(), PAGE_SIZE)) else { - return Err(MigrateStateError::ImportFailed( - "couldn't map GPA in hypercall MSR".to_string(), - )); + return Err(MigrateStateError::ImportFailed(format!( + "couldn't map hypercall page for MSR value \ + {hypercall_msr:?}" + ))); }; write_overlay_page(&mapping, &hypercall_page_contents()); From c79ad3ee9702d0a26869069e64995f8b3148332a Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 23:07:03 +0000 Subject: [PATCH 17/20] lib: oops, overlay pages do require restoration --- lib/propolis/src/enlightenment/hyperv/mod.rs | 21 +++++++------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/mod.rs b/lib/propolis/src/enlightenment/hyperv/mod.rs index 82b9df472..a40fbfe73 100644 --- a/lib/propolis/src/enlightenment/hyperv/mod.rs +++ b/lib/propolis/src/enlightenment/hyperv/mod.rs @@ -141,20 +141,13 @@ impl HyperV { // Write the hypercall instruction sequence to the requested GPA. // - // Note that the previous contents of this page are not saved. The - // TLFS is somewhat ambiguous as to whether this is required: it - // describes the hypercall page as an "overlay" that "covers whatever - // else is mapped to the GPA range", but does not explicitly state that - // the page's previous contents should be restored if the hypercall page - // is moved or disabled. - // - // Experiments show that at least some other hypervisors that implement - // the Hyper-V interface don't save and restore the hypercall page's - // contents. Doing likewise simplifies this code and, perhaps more - // valuably, saves the enlightenment stack from having to save and - // restore saved overlay pages during live migration. This can, however, - // be changed in the future if a guest turns out to require the "restore - // the old value" semantics. + // TODO: TLFS section 5.2.1 specifies that when an overlay is removed, + // "the underlying GPA page is 'uncovered', and an existing mapping + // becomes accessible to the guest." Empirically, at least some other + // Hv#1 implementations don't appear to follow this rule, and most + // common guest OSes don't rely on being able to disable or remove the + // hypercall page. Nevertheless, Propolis should eventually follow this + // rule. write_overlay_page(&mapping, &hypercall_page_contents()); inner.msr_hypercall_value = new; From 12db242e212594228eed27a72c8d2dac3e962767 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 23:17:47 +0000 Subject: [PATCH 18/20] lib: remove dead code --- lib/propolis/src/vmm/mem.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/propolis/src/vmm/mem.rs b/lib/propolis/src/vmm/mem.rs index 4b024e415..31257963c 100644 --- a/lib/propolis/src/vmm/mem.rs +++ b/lib/propolis/src/vmm/mem.rs @@ -910,12 +910,6 @@ impl MemCtx { let mapping = self.region_covered(region.0, region.1, Prot::READ)?; Some(mapping) } - pub fn read_write_region( - &self, - region: &GuestRegion, - ) -> Option { - self.region_covered(region.0, region.1, Prot::RW) - } /// Like `direct_writable_region`, but looks up the region by name. pub fn direct_writable_region_by_name( From c721e11e21d8d7c3e8957c4ee7d76e47dae93529 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 5 Feb 2025 23:29:06 +0000 Subject: [PATCH 19/20] phd: add comment about warning instead of skipping --- phd-tests/tests/src/hyperv.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/phd-tests/tests/src/hyperv.rs b/phd-tests/tests/src/hyperv.rs index 7ca5211da..050b0f75d 100644 --- a/phd-tests/tests/src/hyperv.rs +++ b/phd-tests/tests/src/hyperv.rs @@ -9,6 +9,11 @@ use tracing::warn; /// Attempts to see if the guest has detected Hyper-V support. This is /// best-effort, since not all PHD guest images contain in-box tools that /// display the current hypervisor vendor. +/// +/// NOTE: If the guest lacks a facility to check the hypervisor vendor, this +/// routine logs a warning but does not return a "Skipped" result. This allows +/// the smoke tests to return a Pass result to show that they exercised VM +/// startup and shutdown with Hyper-V emulation enabled. async fn guest_detect_hyperv(vm: &TestVm) -> anyhow::Result<()> { if vm.guest_os_kind().is_linux() { // Many Linux distros come with systemd installed out of the box. On From dffcd6b55308a7227156f6f26f68e466bc1feea2 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Mon, 10 Feb 2025 17:09:25 +0000 Subject: [PATCH 20/20] lib: don't reinvent PAGE_MASK --- lib/propolis/src/enlightenment/hyperv/hypercall.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/propolis/src/enlightenment/hyperv/hypercall.rs b/lib/propolis/src/enlightenment/hyperv/hypercall.rs index d6418b6d8..215cbcc11 100644 --- a/lib/propolis/src/enlightenment/hyperv/hypercall.rs +++ b/lib/propolis/src/enlightenment/hyperv/hypercall.rs @@ -4,13 +4,12 @@ //! Support for hypercalls and their related MSRs. -use crate::common::{GuestAddr, PAGE_SHIFT, PAGE_SIZE}; +use crate::common::{GuestAddr, PAGE_MASK, PAGE_SIZE}; const LOCKED_BIT: u64 = 1; const LOCKED_MASK: u64 = 1 << LOCKED_BIT; const ENABLED_BIT: u64 = 0; const ENABLED_MASK: u64 = 1 << ENABLED_BIT; -const GPA_MASK: u64 = !((1 << PAGE_SHIFT) - 1); /// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register. /// @@ -42,7 +41,7 @@ impl MsrHypercallValue { /// Returns the guest physical address at which the guest would like the /// hypercall page to be placed. pub fn gpa(&self) -> GuestAddr { - GuestAddr(self.0 & GPA_MASK) + GuestAddr(self.0 & PAGE_MASK as u64) } /// Returns whether the hypercall page location is locked. Once locked, the