Skip to content

Commit

Permalink
TDX: Add a separate usermode-only APIC page for VTL 1 (#775)
Browse files Browse the repository at this point in the history
And rename VTL0's APIC page accessors to make things clearer.

This does not actually register the new page with the cpu yet, but
that's not needed to maintain VTL 0 functionality.

Part 1 of #819, fixes #746
  • Loading branch information
smalis-msft authored Feb 21, 2025
1 parent 2b60249 commit 326b7a2
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 43 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,7 @@ dependencies = [
"memory_range",
"nix 0.26.4",
"open_enum",
"page_pool_alloc",
"pal",
"parking_lot",
"safe_intrinsics",
Expand Down
1 change: 1 addition & 0 deletions openhcl/hcl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ sidecar_client.workspace = true
tdcall = { workspace = true, features = ["tracing"] }
x86defs.workspace = true
inspect.workspace = true
page_pool_alloc.workspace = true

parking_lot.workspace = true
signal-hook.workspace = true
Expand Down
45 changes: 34 additions & 11 deletions openhcl/hcl/src/ioctl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ use hvdef::HV_PAGE_SIZE;
use hvdef::HV_PARTITION_ID_SELF;
use hvdef::HV_VP_INDEX_SELF;
use memory_range::MemoryRange;
use page_pool_alloc::PagePoolAllocator;
use page_pool_alloc::PagePoolHandle;
use pal::unix::pthread::*;
use parking_lot::Mutex;
use private::BackingPrivate;
Expand Down Expand Up @@ -155,6 +157,10 @@ pub enum Error {
supported: IsolationType,
requested: IsolationType,
},
#[error("private page pool allocator missing, required for requested isolation type")]
MissingPrivateMemory,
#[error("failed to allocate pages for vp")]
AllocVp(#[source] page_pool_alloc::Error),
}

/// Error for IOCTL errors specifically.
Expand Down Expand Up @@ -1538,9 +1544,9 @@ enum BackingState {
Snp {
vmsa: VtlArray<MappedPage<SevVmsa>, 2>,
},
// TODO GUEST_VSM: vtl 1 vp state
Tdx {
apic_page: MappedPage<ApicPage>,
vtl0_apic_page: MappedPage<ApicPage>,
vtl1_apic_page: PagePoolHandle,
},
}

Expand All @@ -1556,6 +1562,7 @@ impl HclVp {
vp: u32,
map_reg_page: bool,
isolation_type: IsolationType,
private_pool: Option<&PagePoolAllocator>,
) -> Result<Self, Error> {
let fd = &hcl.mshv_vtl.file;
let run: MappedPage<hcl_run> =
Expand Down Expand Up @@ -1587,8 +1594,12 @@ impl HclVp {
}
}
IsolationType::Tdx => BackingState::Tdx {
apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
vtl0_apic_page: MappedPage::new(fd, MSHV_APIC_PAGE_OFFSET | vp as i64)
.map_err(|e| Error::MmapVp(e, Some(Vtl::Vtl0)))?,
vtl1_apic_page: private_pool
.ok_or(Error::MissingPrivateMemory)?
.alloc(1.try_into().unwrap(), "tdx-vtl1-apic-page".to_owned())
.map_err(Error::AllocVp)?,
},
};

Expand Down Expand Up @@ -1813,8 +1824,8 @@ impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
}
}

/// Gets the proxied interrupt request bitmap from the hypervisor.
pub fn proxy_irr(&mut self) -> Option<[u32; 8]> {
/// Gets the proxied interrupt request bitmap for VTL 0 from the hypervisor.
pub fn proxy_irr_vtl0(&mut self) -> Option<[u32; 8]> {
// SAFETY: the `scan_proxy_irr` and `proxy_irr` fields of the run page
// are concurrently updated by the kernel on multiple processors. They
// are accessed atomically everywhere.
Expand All @@ -1836,8 +1847,8 @@ impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
}
}

/// Update the `proxy_irr_blocked` in run page
pub fn update_proxy_irr_filter(&mut self, irr_filter: &[u32; 8]) {
/// Update the `proxy_irr_blocked` for VTL 0 in the run page
pub fn update_proxy_irr_filter_vtl0(&mut self, irr_filter: &[u32; 8]) {
// SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could
// be concurrently accessed by kernel too, hence accessing as Atomic
let proxy_irr_blocked = unsafe {
Expand All @@ -1853,11 +1864,11 @@ impl<'a, T: Backing<'a>> ProcessorRunner<'a, T> {
}
}

/// Gets the proxy_irr_exit bitmask. This mask ensures that
/// Gets the proxy_irr_exit bitmask for VTL 0. This mask ensures that
/// the masked interrupts always exit to user-space, and cannot
/// be injected in the kernel. Interrupts matching this condition
/// will be left on the proxy_irr field.
pub fn proxy_irr_exit_mut(&mut self) -> &mut [u32; 8] {
pub fn proxy_irr_exit_mut_vtl0(&mut self) -> &mut [u32; 8] {
// SAFETY: The `proxy_irr_exit` field of the run page will not be concurrently updated.
unsafe { &mut (*self.run.get()).proxy_irr_exit }
}
Expand Down Expand Up @@ -2212,9 +2223,21 @@ impl Hcl {
}

/// Adds `vp_count` VPs.
pub fn add_vps(&mut self, vp_count: u32) -> Result<(), Error> {
pub fn add_vps(
&mut self,
vp_count: u32,
private_pool: Option<&PagePoolAllocator>,
) -> Result<(), Error> {
self.vps = (0..vp_count)
.map(|vp| HclVp::new(self, vp, self.supports_register_page, self.isolation))
.map(|vp| {
HclVp::new(
self,
vp,
self.supports_register_page,
self.isolation,
private_pool,
)
})
.collect::<Result<_, _>>()?;

Ok(())
Expand Down
32 changes: 21 additions & 11 deletions openhcl/hcl/src/ioctl/tdx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::protocol::tdx_vp_context;
use crate::protocol::tdx_vp_state;
use crate::protocol::tdx_vp_state_flags;
use crate::GuestVtl;
use hv1_structs::VtlArray;
use hvdef::HvRegisterName;
use hvdef::HvRegisterValue;
use memory_range::MemoryRange;
Expand Down Expand Up @@ -41,7 +42,7 @@ use x86defs::vmx::VmcsField;

/// Runner backing for TDX partitions.
pub struct Tdx<'a> {
apic_page: &'a UnsafeCell<ApicPage>,
apic_pages: VtlArray<&'a UnsafeCell<ApicPage>, 2>,
}

impl MshvVtl {
Expand Down Expand Up @@ -119,18 +120,18 @@ impl<'a> ProcessorRunner<'a, Tdx<'a>> {
&self.tdx_vp_context().exit_info
}

/// Gets a reference to the tdx APIC page.
pub fn tdx_apic_page(&self) -> &ApicPage {
// SAFETY: the APIC page will not be concurrently accessed by the processor
/// Gets a reference to the tdx APIC page for the given VTL.
pub fn tdx_apic_page(&self, vtl: GuestVtl) -> &ApicPage {
// SAFETY: the APIC pages will not be concurrently accessed by the processor
// while this VP is in VTL2.
unsafe { &*self.state.apic_page.get() }
unsafe { &*self.state.apic_pages[vtl].get() }
}

/// Gets a mutable reference to the tdx APIC page.
pub fn tdx_apic_page_mut(&mut self) -> &mut ApicPage {
// SAFETY: the APIC page will not be concurrently accessed by the processor
/// Gets a mutable reference to the tdx APIC page for the given VTL.
pub fn tdx_apic_page_mut(&mut self, vtl: GuestVtl) -> &mut ApicPage {
// SAFETY: the APIC pages will not be concurrently accessed by the processor
// while this VP is in VTL2.
unsafe { &mut *self.state.apic_page.get() }
unsafe { &mut *self.state.apic_pages[vtl].get() }
}

/// Gets a reference to TDX VP specific state.
Expand Down Expand Up @@ -413,11 +414,20 @@ impl<'a> ProcessorRunner<'a, Tdx<'a>> {
impl<'a> super::private::BackingPrivate<'a> for Tdx<'a> {
fn new(vp: &'a HclVp, sidecar: Option<&SidecarVp<'_>>) -> Result<Self, NoRunner> {
assert!(sidecar.is_none());
let super::BackingState::Tdx { apic_page } = &vp.backing else {
let super::BackingState::Tdx {
vtl0_apic_page,
vtl1_apic_page,
} = &vp.backing
else {
return Err(NoRunner::MismatchedIsolation);
};

// SAFETY: The mapping is held for the appropriate lifetime, and the
// APIC page is never accessed as any other type, or by any other location.
let vtl1_apic_page = unsafe { &*vtl1_apic_page.mapping().as_ptr().cast() };

Ok(Self {
apic_page: apic_page.as_ref(),
apic_pages: [vtl0_apic_page.as_ref(), vtl1_apic_page].into(),
})
}

Expand Down
11 changes: 9 additions & 2 deletions openhcl/virt_mshv_vtl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ pub enum Error {
InvalidDebugConfiguration,
#[error("missing shared memory for an isolated partition")]
MissingSharedMemory,
#[error("missing private memory for an isolated partition")]
MissingPrivateMemory,
#[error("failed to allocate TLB flush page")]
AllocateTlbFlushPage(#[source] page_pool_alloc::Error),
}

/// Error revoking guest VSM.
Expand Down Expand Up @@ -1556,8 +1560,11 @@ impl<'a> UhProtoPartition<'a> {
}

// Do per-VP HCL initialization.
hcl.add_vps(params.topology.vp_count())
.map_err(Error::Hcl)?;
hcl.add_vps(
params.topology.vp_count(),
late_params.private_vis_pages_pool.as_ref(),
)
.map_err(Error::Hcl)?;

let vps: Vec<_> = params
.topology
Expand Down
2 changes: 1 addition & 1 deletion openhcl/virt_mshv_vtl/src/processor/hardware_cvm/apic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(crate) fn poll_apic_core<'b, B: HardwareIsolatedBacking, T: ApicBacking<'b,
) -> Result<(), UhRunVpError> {
// Check for interrupt requests from the host and kernel offload.
if vtl == GuestVtl::Vtl0 {
if let Some(irr) = apic_backing.vp().runner.proxy_irr() {
if let Some(irr) = apic_backing.vp().runner.proxy_irr_vtl0() {
// We can't put the interrupts directly into offload (where supported) because we might need
// to clear the tmr state. This can happen if a vector was previously used for a level
// triggered interrupt, and is now being used for an edge-triggered interrupt.
Expand Down
4 changes: 3 additions & 1 deletion openhcl/virt_mshv_vtl/src/processor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,7 @@ impl<'a, T: Backing> UhProcessor<'a, T> {

#[cfg(guest_arch = "x86_64")]
fn update_proxy_irr_filter(&mut self, vtl: GuestVtl) {
assert_eq!(vtl, GuestVtl::Vtl0);
let mut irr_bits: BitArray<[u32; 8], Lsb0> = BitArray::new(Default::default());

// Get all not masked && proxy SINT vectors
Expand All @@ -1086,7 +1087,8 @@ impl<'a, T: Backing> UhProcessor<'a, T> {
self.partition.fill_device_vectors(vtl, &mut irr_bits);

// Update `proxy_irr_blocked` filter in run page
self.runner.update_proxy_irr_filter(&irr_bits.into_inner());
self.runner
.update_proxy_irr_filter_vtl0(&irr_bits.into_inner());
}
}

Expand Down
2 changes: 1 addition & 1 deletion openhcl/virt_mshv_vtl/src/processor/snp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ impl BackingPrivate for SnpBacked {
.partition
.shared_vis_pages_pool
.as_ref()
.expect("must have shared vis pool when using SNP")
.ok_or(Error::MissingSharedMemory)?
.alloc(
NonZeroU64::new(shared_pages_required_per_cpu()).expect("is nonzero"),
format!("direct overlay vp {}", params.vp_info.base.vp_index.index()),
Expand Down
32 changes: 16 additions & 16 deletions openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ impl BackingPrivate for TdxBacked {
.partition
.shared_vis_pages_pool
.as_ref()
.expect("must have shared vis pool when using SNP")
.ok_or(crate::Error::MissingSharedMemory)?
.alloc(
NonZeroU64::new(shared_pages_required_per_cpu()).expect("is nonzero"),
format!("direct overlay vp {}", params.vp_info.base.vp_index.index()),
Expand All @@ -696,9 +696,9 @@ impl BackingPrivate for TdxBacked {
.partition
.private_vis_pages_pool
.as_ref()
.expect("private pool exists for cvm")
.ok_or(crate::Error::MissingPrivateMemory)?
.alloc(1.try_into().unwrap(), "tdx_tlb_flush".into())
.expect("not out of memory");
.map_err(crate::Error::AllocateTlbFlushPage)?;

let untrusted_synic = params
.partition
Expand Down Expand Up @@ -849,7 +849,7 @@ impl BackingPrivate for TdxBacked {
// We only offload VTL 0 today.
assert_eq!(vtl, GuestVtl::Vtl0);
tracing::info!("disabling APIC offload due to auto EOI");
let page = zerocopy::transmute_mut!(this.runner.tdx_apic_page_mut());
let page = this.runner.tdx_apic_page_mut(GuestVtl::Vtl0);
let (irr, isr) = pull_apic_offload(page);

this.backing.cvm.lapics[vtl]
Expand Down Expand Up @@ -963,8 +963,7 @@ impl UhProcessor<'_, TdxBacked> {
let r: Result<(), OffloadNotSupported> = self.backing.cvm.lapics[vtl]
.lapic
.push_to_offload(|irr, isr, tmr| {
let apic_page: &mut ApicPage =
zerocopy::transmute_mut!(self.runner.tdx_apic_page_mut());
let apic_page = self.runner.tdx_apic_page_mut(GuestVtl::Vtl0);

for (((irr, page_irr), isr), page_isr) in irr
.iter()
Expand Down Expand Up @@ -1002,8 +1001,8 @@ impl UhProcessor<'_, TdxBacked> {
// must know if that interrupt was previously level-triggered. Otherwise,
// the EOI will be incorrectly treated as level-triggered. We keep a copy
// of the tmr in the kernel so it knows when this scenario occurs.
self.runner.proxy_irr_exit_mut()[i * 2] = tmr as u32;
self.runner.proxy_irr_exit_mut()[i * 2 + 1] = (tmr >> 32) as u32;
self.runner.proxy_irr_exit_mut_vtl0()[i * 2] = tmr as u32;
self.runner.proxy_irr_exit_mut_vtl0()[i * 2 + 1] = (tmr >> 32) as u32;
}
}
});
Expand All @@ -1015,7 +1014,7 @@ impl UhProcessor<'_, TdxBacked> {
}

if update_rvi {
let page: &mut ApicPage = zerocopy::transmute_mut!(self.runner.tdx_apic_page_mut());
let page = self.runner.tdx_apic_page_mut(GuestVtl::Vtl0);
let rvi = top_vector(&page.irr);
self.backing.vtls[vtl].private_regs.rvi = rvi;
}
Expand Down Expand Up @@ -1051,8 +1050,8 @@ impl UhProcessor<'_, TdxBacked> {
) -> R {
let offloaded = self.backing.cvm.lapics[vtl].lapic.is_offloaded();
if offloaded {
let (irr, isr) =
pull_apic_offload(zerocopy::transmute_mut!(self.runner.tdx_apic_page_mut()));
assert_eq!(vtl, GuestVtl::Vtl0);
let (irr, isr) = pull_apic_offload(self.runner.tdx_apic_page_mut(GuestVtl::Vtl0));
self.backing.cvm.lapics[vtl]
.lapic
.disable_offload(&irr, &isr);
Expand Down Expand Up @@ -1156,7 +1155,7 @@ impl<'b> hardware_cvm::apic::ApicBacking<'b, TdxBacked> for TdxApicScanner<'_, '
}

let priority = vector >> 4;
let apic: &ApicPage = zerocopy::transmute_ref!(self.vp.runner.tdx_apic_page());
let apic = self.vp.runner.tdx_apic_page(vtl);
if (apic.tpr.value as u8 >> 4) >= priority {
self.tpr_threshold = priority;
return Ok(());
Expand Down Expand Up @@ -1548,7 +1547,7 @@ impl UhProcessor<'_, TdxBacked> {
.access(&mut TdxApicClient {
partition: self.partition,
vmtime: &self.vmtime,
apic_page: zerocopy::transmute_mut!(self.runner.tdx_apic_page_mut()),
apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
dev,
vtl: intercepted_vtl,
})
Expand Down Expand Up @@ -1601,7 +1600,7 @@ impl UhProcessor<'_, TdxBacked> {
.access(&mut TdxApicClient {
partition: self.partition,
vmtime: &self.vmtime,
apic_page: zerocopy::transmute_mut!(self.runner.tdx_apic_page_mut()),
apic_page: self.runner.tdx_apic_page_mut(intercepted_vtl),
dev,
vtl: intercepted_vtl,
})
Expand Down Expand Up @@ -2413,7 +2412,7 @@ impl<T: CpuIo> X86EmulatorSupport for UhEmulationState<'_, '_, T, TdxBacked> {
partition: self.vp.partition,
dev: self.devices,
vmtime: &self.vp.vmtime,
apic_page: zerocopy::transmute_mut!(self.vp.runner.tdx_apic_page_mut()),
apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
vtl: self.vtl,
})
.mmio_read(address, data);
Expand All @@ -2426,7 +2425,7 @@ impl<T: CpuIo> X86EmulatorSupport for UhEmulationState<'_, '_, T, TdxBacked> {
partition: self.vp.partition,
dev: self.devices,
vmtime: &self.vp.vmtime,
apic_page: zerocopy::transmute_mut!(self.vp.runner.tdx_apic_page_mut()),
apic_page: self.vp.runner.tdx_apic_page_mut(self.vtl),
vtl: self.vtl,
})
.mmio_write(address, data);
Expand Down Expand Up @@ -2662,6 +2661,7 @@ impl<T: CpuIo> ApicClient for TdxApicClient<'_, T> {
}

fn pull_offload(&mut self) -> ([u32; 8], [u32; 8]) {
assert_eq!(self.vtl, GuestVtl::Vtl0);
pull_apic_offload(self.apic_page)
}
}
Expand Down
Loading

0 comments on commit 326b7a2

Please sign in to comment.