Skip to content

Commit f839831

Browse files
authored
lib: tidy up overlay page migration & reduce memory usage (#861)
Tweak a few bits of Hyper-V overlay migration: - Don't store page kinds separately from page contents. Instead, a page's kind *determines* its contents. Most overlays have static contents or contents that depend on only a couple of parameters (e.g. the TSC scale/offset for the TSC reference page), so this saves a fair bit of memory for most kinds of pending overlay pages. - Change the overlay ordering discipline to depend entirely on the ordering of `OverlayKind`s. (Previously the first overlay assigned to a PFN would become active and then would remain active until it was removed, but pending overlays were promoted in order by kind.) This obviates the need to export/import a PFN's current active overlay during migration (or to remember the order in which overlays were applied), since a set of overlays at a single PFN will always have the same active overlay (...provided the ordering remains stable across Propolis versions; there's a comment about this caveat in the new code.) - Continue to manually transfer the original contents of guest pages that are covered by an overlay (since these still need to be restored), but wrap them in an enum with a variant that specifies that the overlaid page contained all zeroes. Because VM memory is initially zeroed, and many guests will establish overlay pages early in boot over pages they haven't otherwise touched, this can significantly reduce the amount of serialized page data that the manager needs to send over a migration.
1 parent 0c6e3cd commit f839831

File tree

5 files changed

+282
-289
lines changed

5 files changed

+282
-289
lines changed

bin/propolis-server/src/lib/vm/objects.rs

+6
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,19 @@ impl VmObjectsLocked {
273273

274274
/// Pauses this VM's devices and its kernel VMM.
275275
pub(crate) async fn pause(&mut self) {
276+
// Order matters here: the Propolis lifecycle trait's pause function
277+
// requires that all vCPUs pause before any devices do, and all vCPUs
278+
// must be paused before the kernel VM can pause.
276279
self.vcpu_tasks.pause_all();
277280
self.pause_devices().await;
278281
self.pause_kernel_vm();
279282
}
280283

281284
/// Resumes this VM's devices and its kernel VMM.
282285
pub(crate) fn resume(&mut self) {
286+
// Order matters here: the kernel VM must resume before any vCPUs can
287+
// resume, and the Propolis lifecycle trait's resume function requires
288+
// that all devices resume before any vCPUs do.
283289
self.resume_kernel_vm();
284290
self.resume_devices();
285291
self.vcpu_tasks.resume_all();

bin/propolis-standalone/src/main.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,9 @@ impl Instance {
460460
}
461461
}
462462
State::Quiesce => {
463-
// stop device emulation and vCPUs
463+
// Stop device emulation and vCPUs. Note that the device
464+
// lifecycle trait requires vCPUs to be paused before any
465+
// devices pause.
464466
for vcpu_task in guard.vcpu_tasks.iter_mut() {
465467
let _ = vcpu_task.hold();
466468
}

lib/propolis/src/enlightenment/hyperv/mod.rs

+85-70
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,13 @@
1717
use std::sync::{Arc, Mutex};
1818

1919
use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues};
20-
use overlay::{
21-
OverlayContents, OverlayError, OverlayKind, OverlayManager, OverlayPage,
22-
};
20+
use overlay::{OverlayError, OverlayKind, OverlayManager, OverlayPage};
2321

2422
use crate::{
2523
accessors::MemAccessor,
2624
common::{Lifecycle, VcpuId},
2725
enlightenment::{
28-
hyperv::{
29-
bits::*,
30-
hypercall::{hypercall_page_contents, MsrHypercallValue},
31-
},
26+
hyperv::{bits::*, hypercall::MsrHypercallValue},
3227
AddCpuidError,
3328
},
3429
migrate::{
@@ -52,6 +47,13 @@ mod probes {
5247

5348
const TYPE_NAME: &str = "guest-hyperv-interface";
5449

50+
/// A collection of overlay pages that a Hyper-V enlightenment stack might be
51+
/// managing.
52+
#[derive(Default)]
53+
struct OverlayPages {
54+
hypercall: Option<OverlayPage>,
55+
}
56+
5557
#[derive(Default)]
5658
struct Inner {
5759
/// This enlightenment's overlay manager.
@@ -63,7 +65,8 @@ struct Inner {
6365
/// The last value stored in the [`bits::HV_X64_MSR_HYPERCALL`] MSR.
6466
msr_hypercall_value: MsrHypercallValue,
6567

66-
hypercall_overlay: Option<OverlayPage>,
68+
/// This enlightenment's active overlay page handles.
69+
overlays: OverlayPages,
6770
}
6871

6972
pub struct HyperV {
@@ -95,7 +98,7 @@ impl HyperV {
9598
// manually cleared this bit).
9699
if value == 0 {
97100
inner.msr_hypercall_value.clear_enabled();
98-
inner.hypercall_overlay.take();
101+
inner.overlays.hypercall.take();
99102
}
100103

101104
inner.msr_guest_os_id_value = value;
@@ -131,23 +134,22 @@ impl HyperV {
131134
// the guest.
132135
if !new.enabled() {
133136
inner.msr_hypercall_value = new;
134-
inner.hypercall_overlay.take();
137+
inner.overlays.hypercall.take();
135138
return WrmsrOutcome::Handled;
136139
}
137140

138141
// Ensure the overlay is in the correct position.
139-
let res = if let Some(overlay) = inner.hypercall_overlay.as_mut() {
142+
let res = if let Some(overlay) = inner.overlays.hypercall.as_mut() {
140143
overlay.move_to(new.gpfn())
141144
} else {
142145
inner
143146
.overlay_manager
144147
.add_overlay(
145148
new.gpfn(),
146-
OverlayKind::Hypercall,
147-
OverlayContents(Box::new(hypercall_page_contents())),
149+
OverlayKind::HypercallReturnNotSupported,
148150
)
149151
.map(|overlay| {
150-
inner.hypercall_overlay = Some(overlay);
152+
inner.overlays.hypercall = Some(overlay);
151153
})
152154
};
153155

@@ -250,40 +252,63 @@ impl Lifecycle for HyperV {
250252
TYPE_NAME
251253
}
252254

253-
fn reset(&self) {
255+
fn pause(&self) {
254256
let mut inner = self.inner.lock().unwrap();
255257

256-
// Create a new overlay manager that tracks no pages. The old overlay
257-
// manager needs to be dropped before any of the overlay pages that
258-
// referenced it, so explicitly replace the existing manager with a new
259-
// one (and drop the old one) before default-initializing the rest of
260-
// the enlightenment's state.
261-
let new_overlay_manager = Arc::new(OverlayManager::default());
262-
let _ = std::mem::replace(
263-
&mut inner.overlay_manager,
264-
new_overlay_manager.clone(),
265-
);
266-
267-
*inner = Inner {
268-
overlay_manager: new_overlay_manager,
269-
..Default::default()
270-
};
258+
// Remove all active overlays from service. If the VM migrates, this
259+
// allows the original guest pages that sit underneath those overlays to
260+
// be transferred as part of the guest RAM transfer phase instead of
261+
// possibly being serialized and sent during the device state phase. Any
262+
// active overlays will be re-established on the target during its
263+
// device state import phase.
264+
//
265+
// Any guest data written to the overlay pages will be lost. That's OK
266+
// because all the overlays this module currently supports are
267+
// semantically read-only (guests should expect to take an exception if
268+
// they try to write to them, although today no such exception is
269+
// raised).
270+
//
271+
// The caller who is coordinating the "pause VM" operation is required
272+
// to ensure that devices are paused only if vCPUs are paused, so no
273+
// vCPU will be able to observe the missing overlay.
274+
inner.overlays = OverlayPages::default();
271275

272-
inner.overlay_manager.attach(&self.acc_mem);
276+
assert!(inner.overlay_manager.is_empty());
273277
}
274278

275-
fn halt(&self) {
279+
fn resume(&self) {
276280
let mut inner = self.inner.lock().unwrap();
277281

278-
// Create a new overlay manager and drop the reference to the old one.
279-
// This should be the only active reference to this manager, since all
280-
// overlay page operations happen during VM exits, and the vCPUs have
281-
// all quiesced by this point.
282+
assert!(inner.overlay_manager.is_empty());
283+
284+
// Re-establish any overlays that were removed when the enlightenment
285+
// was paused.
282286
//
283-
// This ensures that when this object is dropped, any overlay pages it
284-
// owns can be dropped safely.
285-
assert_eq!(Arc::strong_count(&inner.overlay_manager), 1);
286-
inner.overlay_manager = OverlayManager::new();
287+
// Writes to the hypercall MSR only persist if they specify a valid
288+
// overlay PFN, so adding the hypercall overlay is guaranteed to
289+
// succeed.
290+
let hypercall_overlay =
291+
inner.msr_hypercall_value.enabled().then(|| {
292+
inner
293+
.overlay_manager
294+
.add_overlay(
295+
inner.msr_hypercall_value.gpfn(),
296+
OverlayKind::HypercallReturnNotSupported,
297+
)
298+
.expect("hypercall MSR is only enabled with a valid PFN")
299+
});
300+
301+
inner.overlays = OverlayPages { hypercall: hypercall_overlay };
302+
}
303+
304+
fn reset(&self) {
305+
let inner = self.inner.lock().unwrap();
306+
assert!(inner.overlay_manager.is_empty());
307+
}
308+
309+
fn halt(&self) {
310+
let inner = self.inner.lock().unwrap();
311+
assert!(inner.overlay_manager.is_empty());
287312
}
288313

289314
fn migrate(&'_ self) -> Migrator<'_> {
@@ -309,58 +334,48 @@ impl MigrateSingle for HyperV {
309334
mut offer: PayloadOffer,
310335
_ctx: &MigrateCtx,
311336
) -> Result<(), MigrateStateError> {
312-
let data: migrate::HyperVEnlightenmentV1 = offer.parse()?;
337+
let migrate::HyperVEnlightenmentV1 { msr_guest_os_id, msr_hypercall } =
338+
offer.parse()?;
313339
let mut inner = self.inner.lock().unwrap();
314340

341+
// Re-establish any overlay pages that are active in the restored MSRs.
342+
//
315343
// A well-behaved source should ensure that the hypercall MSR value is
316344
// within the guest's PA range and that its Enabled bit agrees with the
317345
// value of the guest OS ID MSR. But this data was received over the
318346
// wire, so for safety's sake, verify it all and return a migration
319347
// error if anything is inconsistent.
320-
let hypercall_msr = MsrHypercallValue(data.msr_hypercall);
321-
if hypercall_msr.enabled() {
322-
if data.msr_guest_os_id == 0 {
348+
let msr_hypercall_value = MsrHypercallValue(msr_hypercall);
349+
let hypercall_overlay = if msr_hypercall_value.enabled() {
350+
if msr_guest_os_id == 0 {
323351
return Err(MigrateStateError::ImportFailed(
324352
"hypercall MSR enabled but guest OS ID MSR is 0"
325353
.to_string(),
326354
));
327355
}
328356

329-
// TODO(#850): Registering a new overlay with the overlay manager is
330-
// the only way to get an `OverlayPage` for this overlay. However,
331-
// the page's contents were already migrated in the RAM transfer
332-
// phase, so it's not actually necessary to create a *new* overlay
333-
// here; in fact, it would be incorrect to do so if the hypercall
334-
// target PFN had multiple overlays and a different one was active!
335-
// Fortunately, there's only one overlay type right now, so there's
336-
// no way for a page to have multiple overlays.
337-
//
338-
// (It would also be incorrect to rewrite this page if the guest
339-
// wrote data to it expecting to retrieve it later, but per the
340-
// TLFS, writes to the hypercall page should #GP anyway, so guests
341-
// ought not to expect too much here.)
342-
//
343-
// A better approach is to have the overlay manager export and
344-
// import its contents and, on import, return the set of overlay
345-
// page registrations that it imported. This layer can then check
346-
// that these registrations are consistent with its MSR values
347-
// before proceeding.
348357
match inner.overlay_manager.add_overlay(
349-
hypercall_msr.gpfn(),
350-
OverlayKind::Hypercall,
351-
OverlayContents(Box::new(hypercall_page_contents())),
358+
msr_hypercall_value.gpfn(),
359+
OverlayKind::HypercallReturnNotSupported,
352360
) {
353-
Ok(overlay) => inner.hypercall_overlay = Some(overlay),
361+
Ok(overlay) => Some(overlay),
354362
Err(e) => {
355363
return Err(MigrateStateError::ImportFailed(format!(
356364
"failed to re-establish hypercall overlay: {e}"
357365
)))
358366
}
359367
}
360-
}
368+
} else {
369+
None
370+
};
371+
372+
*inner = Inner {
373+
overlay_manager: inner.overlay_manager.clone(),
374+
msr_guest_os_id_value: msr_guest_os_id,
375+
msr_hypercall_value,
376+
overlays: OverlayPages { hypercall: hypercall_overlay },
377+
};
361378

362-
inner.msr_guest_os_id_value = data.msr_guest_os_id;
363-
inner.msr_hypercall_value = hypercall_msr;
364379
Ok(())
365380
}
366381
}

0 commit comments

Comments
 (0)