Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lib: emulate Hyper-V enlightenment stack #849

Merged
merged 20 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions bin/propolis-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag};
use propolis_client::support::nvme_serial_from_str;
use propolis_client::types::{
BlobStorageBackend, Board, Chipset, ComponentV0, CrucibleStorageBackend,
I440Fx, InstanceEnsureRequest, InstanceInitializationMethod,
InstanceMetadata, InstanceSpecGetResponse, InstanceSpecV0, NvmeDisk,
QemuPvpanic, ReplacementComponent, SerialPort, SerialPortNumber,
VirtioDisk,
GuestHypervisorInterface, I440Fx, InstanceEnsureRequest,
InstanceInitializationMethod, InstanceMetadata, InstanceSpecGetResponse,
InstanceSpecV0, NvmeDisk, QemuPvpanic, ReplacementComponent, SerialPort,
SerialPortNumber, VirtioDisk,
};
use propolis_client::{PciPath, SpecKey};
use propolis_config_toml::spec::SpecConfig;
Expand Down Expand Up @@ -189,6 +189,10 @@ struct VmConfig {
// cloud_init ISO file
#[clap(long, action, conflicts_with = "spec")]
cloud_init: Option<PathBuf>,

/// enable Hyper-V compatible enlightenments for this VM
#[clap(long, action)]
hyperv: bool,
}

fn add_component_to_spec(
Expand Down Expand Up @@ -293,7 +297,11 @@ impl VmConfig {
cpuid: None,
cpus: self.vcpus,
memory_mb: self.memory,
guest_hv_interface: None,
guest_hv_interface: if self.hyperv {
Some(GuestHypervisorInterface::HyperV { features: vec![] })
} else {
None
},
},
components: Default::default(),
};
Expand Down
22 changes: 17 additions & 5 deletions bin/propolis-server/src/lib/vm/ensure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use std::sync::Arc;

use oximeter::types::ProducerRegistry;
use oximeter_instruments::kstat::KstatSampler;
use propolis::enlightenment::{bhyve::BhyveGuestInterface, Enlightenment};
use propolis::enlightenment::{
bhyve::BhyveGuestInterface, hyperv::HyperV, Enlightenment,
};
use propolis_api_types::{
instance_spec::components::board::GuestHypervisorInterface,
InstanceEnsureResponse, InstanceMigrateInitiateResponse,
Expand Down Expand Up @@ -391,9 +393,19 @@ async fn initialize_vm_objects(

let vmm_log = log.new(slog::o!("component" => "vmm"));

let guest_hv_interface = match spec.board.guest_hv_interface {
GuestHypervisorInterface::Bhyve => Arc::new(BhyveGuestInterface),
};
let (guest_hv_interface, guest_hv_lifecycle) =
match spec.board.guest_hv_interface {
GuestHypervisorInterface::Bhyve => {
let bhyve = Arc::new(BhyveGuestInterface);
let lifecycle = bhyve.clone();
(bhyve as Arc<dyn Enlightenment>, lifecycle.as_lifecycle())
}
GuestHypervisorInterface::HyperV { .. } => {
let hyperv = Arc::new(HyperV::new(&vmm_log));
let lifecycle = hyperv.clone();
(hyperv as Arc<dyn Enlightenment>, lifecycle.as_lifecycle())
}
};

// Set up the 'shell' instance into which the rest of this routine will
// add components.
Expand Down Expand Up @@ -458,7 +470,7 @@ async fn initialize_vm_objects(
let ramfb =
init.initialize_fwcfg(spec.board.cpus, &options.bootrom_version)?;

init.register_guest_hv_interface(guest_hv_interface.as_lifecycle());
init.register_guest_hv_interface(guest_hv_lifecycle);
init.initialize_cpus().await?;
let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new(
&machine,
Expand Down
29 changes: 29 additions & 0 deletions crates/propolis-api-types/src/instance_spec/components/board.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! VM mainboard components. Every VM has a board, even if it has no other
//! peripherals.

use std::collections::BTreeSet;

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -92,6 +94,29 @@ pub struct CpuidEntry {
pub edx: u32,
}

/// Flags that enable "simple" Hyper-V enlightenments that require no
/// feature-specific configuration.
//
// NOTE: This enum's variants should never have any associated data (note that
// the type doesn't use serde's `tag` and `content` attributes). If a future
// enlightenment requires associated data, it should be put into a
// `HyperVExtendedFeatures` struct (or similar), and the `HyperV` variant of
// `GuestHypervisorInterface` should be extended to `Option`ally include that
// struct.
#[derive(
Clone,
Deserialize,
Serialize,
Debug,
JsonSchema,
Ord,
PartialOrd,
Eq,
PartialEq,
)]
#[serde(deny_unknown_fields)]
pub enum HyperVFeatureFlag {}

/// A hypervisor interface to expose to the guest.
#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema, Default)]
#[serde(deny_unknown_fields, tag = "type", content = "value")]
Expand All @@ -100,6 +125,10 @@ pub enum GuestHypervisorInterface {
/// leaf 0x4000_0000 and no additional leaves or features).
#[default]
Bhyve,

/// Expose a Hyper-V-compatible hypervisor interface with the supplied
/// features enabled.
HyperV { features: BTreeSet<HyperVFeatureFlag> },
}

impl GuestHypervisorInterface {
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ impl RWOp<'_, '_> {
}

/// An address within a guest VM.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct GuestAddr(pub u64);

impl GuestAddr {
Expand Down
123 changes: 123 additions & 0 deletions lib/propolis/src/enlightenment/hyperv/bits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Constant definitions and flags for Hyper-V emulations. These are drawn from
//! the Hyper-V TLFS version 6.0b (referred to as "TLFS" below). See the parent
//! module documentation for more details.
//!
//! Where possible, constants in this module (such as MSR identifiers) are given
//! names that match those used in the TLFS.

use cpuid_utils::CpuidValues;

/// Hyper-V-compatible hypervisors are required to support hypervisor CPUID
/// leaves up to 0x4000_0005.
pub(super) const HYPERV_MIN_REQUIRED_CPUID_LEAF: u32 = 0x40000005;

/// CPUID leaf 0x4000_0000 contains hypervisor identifying information. eax
/// receives the highest valid CPUID leaf in the hypervisor range. ebx, ecx, and
/// edx receive a 12-byte vendor ID.
///
/// In order to get both Linux and Windows guests to accept these
/// enlightenments, the ebx/ecx/edx ID here is set to "Microsoft Hv". Windows
/// guests will accept other vendor IDs (they look at leaf 0x4000_0001 eax to
/// identify the hypervisor interface instead of reading the vendor ID in leaf
/// 0), but Linux guests only consider the vendor ID.
const HYPERV_LEAF_0_VALUES: CpuidValues = CpuidValues {
eax: HYPERV_MIN_REQUIRED_CPUID_LEAF,
ebx: 0x7263694D,
ecx: 0x666F736F,
edx: 0x76482074,
};

/// Generates values for CPUID leaf 0x4000_0000, which contains hypervisor
/// identifying information. eax receives the value of `max_leaf`, the maximum
/// valid CPUID leaf in the hypervisor range; ebx, ecx, and edx contain an
/// appropriate vendor ID.
///
/// `max_leaf` supplies the maximum valid CPUID leaf in the hypervisor range.
///
/// # Panics
///
/// Panics if `max_leaf` is less than [`HYPERV_MIN_REQUIRED_CPUID_LEAF`].
pub(super) fn hyperv_leaf_0_values(max_leaf: u32) -> CpuidValues {
assert!(
max_leaf >= HYPERV_MIN_REQUIRED_CPUID_LEAF,
"requested max leaf {max_leaf:#x} less than minimum required"
);

CpuidValues { eax: max_leaf, ..HYPERV_LEAF_0_VALUES }
}

/// Hyper-V leaf 0x4000_0001 contains an (ostensibly vendor-neutral) interface
/// identifier. eax receives "Hv#1"; the other three outputs are reserved.
pub(super) const HYPERV_LEAF_1_VALUES: CpuidValues =
CpuidValues { eax: 0x31237648, ebx: 0, ecx: 0, edx: 0 };

/// Hyper-V leaf 0x4000_0002 contains hypervisor version information. To avoid
/// having to reason about what it means to expose a specific hypervisor version
/// across a live migration between potentially different host and/or Propolis
/// versions, this information is always set to 0.
pub(super) const HYPERV_LEAF_2_VALUES: CpuidValues =
CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 };

bitflags::bitflags! {
/// Hyper-V leaf 0x4000_0003 eax returns synthetic MSR access rights.
/// Only the bits actually used by this enlightenment stack are enumerated
/// here.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct HyperVLeaf3Eax: u32 {
const PARTITION_REFERENCE_COUNTER = 1 << 1;
const HYPERCALL = 1 << 5;
const VP_INDEX = 1 << 6;
const PARTITION_REFERENCE_TSC = 1 << 9;

// Bits 14-31 of this register are reserved.
}
}

impl Default for HyperVLeaf3Eax {
/// Grants access to the VP index and hypercall MSRs. This is the minimum
/// set of access rights that all Hyper-V-compatible hypervisors must grant.
fn default() -> Self {
HyperVLeaf3Eax::VP_INDEX | HyperVLeaf3Eax::HYPERCALL
}
}

/// Hyper-V leaf 0x4000_0004 describes behavior that the guest OS should
/// implement for optimal performance. Propolis expresses no opinion about these
/// options, except that it indicates in ebx that the guest should never try to
/// notify the hypervisor about failed spinlock acquisitions.
pub(super) const HYPERV_LEAF_4_VALUES: CpuidValues =
CpuidValues { eax: 0, ebx: 0xFFFFFFFF, ecx: 0, edx: 0 };

/// Hyper-V leaf 0x4000_0005 describes the hypervisor's CPU and interrupt
/// remapping limits. Hypervisors are allowed not to expose these limits by
/// publishing 0s to this leaf.
pub(super) const HYPERV_LEAF_5_VALUES: CpuidValues =
CpuidValues { eax: 0, ebx: 0, ecx: 0, edx: 0 };

/// Allows the guest to report its type and version information. See TLFS
/// section 2.6 for details about this MSR's format.
///
/// Guest OSes are required to identify themselves via this MSR before they can
/// set the enabled bit in [`HV_X64_MSR_HYPERCALL`] or make any hypercalls.
///
/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege.
pub(super) const HV_X64_MSR_GUEST_OS_ID: u32 = 0x4000_0000;

/// Specifies the guest physical address at which the guest would like to place
/// the hypercall page. See TLFS section 3.13 and the [`MsrHypercalLValue`]
/// struct.
///
/// Read-write; requires the [`HyperVLeaf3Eax::HYPERCALL`] privilege.
///
/// [`MsrHypercallValue`]: super::hypercall::MsrHypercallValue
pub(super) const HV_X64_MSR_HYPERCALL: u32 = 0x4000_0001;

/// Guests may read this register to obtain the index of the vCPU that read the
/// register.
///
/// Read-only; requires the [`HyperVLeaf3Eax::VP_INDEX`] privilege.
pub(super) const HV_X64_MSR_VP_INDEX: u32 = 0x4000_0002;
80 changes: 80 additions & 0 deletions lib/propolis/src/enlightenment/hyperv/hypercall.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Support for hypercalls and their related MSRs.

use crate::common::{GuestAddr, PAGE_MASK, PAGE_SIZE};

const LOCKED_BIT: u64 = 1;
const LOCKED_MASK: u64 = 1 << LOCKED_BIT;
const ENABLED_BIT: u64 = 0;
const ENABLED_MASK: u64 = 1 << ENABLED_BIT;

/// Represents a value written to the [`HV_X64_MSR_HYPERCALL`] register.
///
/// Writing to this register enables the hypercall page. The hypervisor
/// overwrites this page with an instruction sequence that the guest should
/// execute in order to issue a call to the hypervisor. See
/// [`HYPERCALL_INSTRUCTION_SEQUENCE`].
///
/// Bits 11:2 of this register are reserved. The TLFS specifies that the guest
/// "should ignore [them] on reads and preserve [them] on writes," but imposes
/// no particular penalties on guests that modify these bits.
///
/// [`HV_X64_MSR_HYPERCALL`]: super::bits::HV_X64_MSR_HYPERCALL
#[derive(Clone, Copy, Default)]
pub(super) struct MsrHypercallValue(pub(super) u64);

impl std::fmt::Debug for MsrHypercallValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MsrHypercallValue")
.field("raw", &format!("{:#x}", self.0))
.field("gpa", &format!("{:#x}", self.gpa().0))
.field("locked", &self.locked())
.field("enabled", &self.enabled())
.finish()
}
}

impl MsrHypercallValue {
/// Returns the guest physical address at which the guest would like the
/// hypercall page to be placed.
pub fn gpa(&self) -> GuestAddr {
GuestAddr(self.0 & PAGE_MASK as u64)
}

/// Returns whether the hypercall page location is locked. Once locked, the
/// value in `MSR_HYPERCALL` cannot change until the hypervisor resets the
/// guest.
pub fn locked(&self) -> bool {
(self.0 & LOCKED_MASK) != 0
}

/// Indicates whether the hypercall page is enabled.
pub fn enabled(&self) -> bool {
(self.0 & ENABLED_MASK) != 0
}

/// Clears this value's enabled bit.
pub fn clear_enabled(&mut self) {
self.0 &= !ENABLED_MASK;
}
}

/// The sequence of instructions to write to the hypercall page. This sequence
/// is `mov rax, 2; ret`, which returns a "not supported" status for all
/// hypercalls without actually requiring the guest to exit.
//
// If and when actual hypercall support is required, this should change to
// either `0f 01 c1` (VMCALL) or `0f 01 d9` (VMMCALL), depending on whether the
// host is VMX- or SVM-based.
const HYPERCALL_INSTRUCTION_SEQUENCE: [u8; 8] =
[0x48, 0xc7, 0xc0, 0x02, 0x00, 0x00, 0x00, 0xc3];

/// Yields a page-sized buffer containing the contents of the hypercall page.
pub(super) fn hypercall_page_contents() -> [u8; PAGE_SIZE] {
let mut page = [0u8; PAGE_SIZE];
page[0..8].copy_from_slice(&HYPERCALL_INSTRUCTION_SEQUENCE);
page
}
Loading
Loading