Skip to content

Commit 5b73261

Browse files
authoredMar 14, 2025··
server: improve behavior of starting VMs that are waiting for Crucible activation (#873)
Modify the state driver's VM startup procedure to allow the driver to process Crucible volume configuration changes while block backends are being activated. This fixes a livelock that occurs when starting a VM with a Crucible VCR that points to an unavailable downstairs: the unavailable downstairs prevents Crucible activation from proceeding; Nexus sends a corrected VCR that, if applied, would allow the upstairs to activate; but the state driver never applies the new VCR because it's blocked trying to activate using the broken VCR. Since the state driver can now dequeue VM state change requests during startup, also teach it to abort startup if a stop request is received while block backends are starting. This is very slightly spicy, because it can cancel a block backend startup future in a way that was not possible before, but (a) the only affected backend is the Crucible backend, and (b) there should be no requests in flight anyway because, if this case is reached, the VM's vCPUs have not started. Add PHD coverage of the new behaviors: - Modify the PHD VCR replacement smoke test to check that start requests with a bad set of Crucible targets can be unblocked by replacing a bad target with a good target. - Add a server state machine test that checks that a VM that is blocked waiting for Crucible activation can be explicitly stopped. To assist with this, add a feature to PHD Crucible disks that allows a test to specify that a disk's generated VCRs should contain an invalid downstairs IP address. Starting a VM with a disk configured this way will cause activation to block until the disk's VCR is replaced with a corrected VCR.
1 parent 7cd7c88 commit 5b73261

File tree

12 files changed

+475
-127
lines changed

12 files changed

+475
-127
lines changed
 

‎bin/propolis-cli/src/main.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ enum Command {
155155
/// Call the VolumeConstructionRequest replace endpoint
156156
Vcr {
157157
/// Uuid for the disk
158-
#[clap(short = 'u', action)]
159-
uuid: Uuid,
158+
#[clap(short = 'd', action)]
159+
disk_id: String,
160160

161161
/// File with a JSON InstanceVcrReplace struct
162162
#[clap(long, action)]
@@ -510,7 +510,7 @@ async fn new_instance(
510510

511511
async fn replace_vcr(
512512
client: &Client,
513-
id: Uuid,
513+
id: String,
514514
vcr_replace: InstanceVcrReplace,
515515
) -> anyhow::Result<()> {
516516
// Try to call the endpoint
@@ -941,9 +941,9 @@ async fn main() -> anyhow::Result<()> {
941941
}
942942
Command::Monitor => monitor(addr).await?,
943943
Command::InjectNmi => inject_nmi(&client).await?,
944-
Command::Vcr { uuid, vcr_replace } => {
944+
Command::Vcr { disk_id, vcr_replace } => {
945945
let replace: InstanceVcrReplace = parse_json_file(&vcr_replace)?;
946-
replace_vcr(&client, uuid, replace).await?
946+
replace_vcr(&client, disk_id, replace).await?
947947
}
948948
}
949949

‎bin/propolis-server/src/lib/vm/objects.rs

+20-55
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
2323

2424
use crate::{serial::Serial, spec::Spec, vcpu_tasks::VcpuTaskController};
2525

26-
use super::{
27-
state_driver::VmStartReason, BlockBackendMap, CrucibleBackendMap, DeviceMap,
28-
};
26+
use super::{BlockBackendMap, CrucibleBackendMap, DeviceMap};
2927

3028
/// A collection of components that make up a Propolis VM instance.
3129
pub(crate) struct VmObjects {
@@ -189,6 +187,14 @@ impl VmObjectsLocked {
189187
&self.ps2ctrl
190188
}
191189

190+
pub(crate) fn device_map(&self) -> &DeviceMap {
191+
&self.devices
192+
}
193+
194+
pub(crate) fn block_backend_map(&self) -> &BlockBackendMap {
195+
&self.block_backends
196+
}
197+
192198
/// Iterates over all of the lifecycle trait objects in this VM and calls
193199
/// `func` on each one.
194200
pub(crate) fn for_each_device(
@@ -244,33 +250,6 @@ impl VmObjectsLocked {
244250
self.machine.reinitialize().unwrap();
245251
}
246252

247-
/// Starts a VM's devices and allows all of its vCPU tasks to run.
248-
///
249-
/// This function may be called either after initializing a new VM from
250-
/// scratch or after an inbound live migration. In the latter case, this
251-
/// routine assumes that the caller initialized and activated the VM's vCPUs
252-
/// prior to importing state from the migration source.
253-
pub(super) async fn start(
254-
&mut self,
255-
reason: VmStartReason,
256-
) -> anyhow::Result<()> {
257-
match reason {
258-
VmStartReason::ExplicitRequest => {
259-
self.reset_vcpus();
260-
}
261-
VmStartReason::MigratedIn => {
262-
self.resume_kernel_vm();
263-
}
264-
}
265-
266-
let result = self.start_devices().await;
267-
if result.is_ok() {
268-
self.vcpu_tasks.resume_all();
269-
}
270-
271-
result
272-
}
273-
274253
/// Pauses this VM's devices and its kernel VMM.
275254
pub(crate) async fn pause(&mut self) {
276255
// Order matters here: the Propolis lifecycle trait's pause function
@@ -288,6 +267,16 @@ impl VmObjectsLocked {
288267
// that all devices resume before any vCPUs do.
289268
self.resume_kernel_vm();
290269
self.resume_devices();
270+
self.resume_vcpus();
271+
}
272+
273+
/// Resumes this VM's vCPU tasks.
274+
///
275+
/// This is intended for use in VM startup sequences where the state driver
276+
/// needs fine-grained control over the order in which devices and vCPUs
277+
/// start. When pausing and resuming a VM that's already been started, use
278+
/// [`Self::pause`] and [`Self::resume`] instead.
279+
pub(crate) fn resume_vcpus(&mut self) {
291280
self.vcpu_tasks.resume_all();
292281
}
293282

@@ -321,31 +310,7 @@ impl VmObjectsLocked {
321310
// Resume devices so they're ready to do more work, then resume
322311
// vCPUs.
323312
self.resume_devices();
324-
self.vcpu_tasks.resume_all();
325-
}
326-
327-
/// Starts all of a VM's devices and allows its block backends to process
328-
/// requests from their devices.
329-
async fn start_devices(&self) -> anyhow::Result<()> {
330-
self.for_each_device_fallible(|name, dev| {
331-
info!(self.log, "sending startup complete to {}", name);
332-
let res = dev.start();
333-
if let Err(e) = &res {
334-
error!(self.log, "startup failed for {}: {:?}", name, e);
335-
}
336-
res
337-
})?;
338-
339-
for (name, backend) in self.block_backends.iter() {
340-
info!(self.log, "starting block backend {}", name);
341-
let res = backend.start().await;
342-
if let Err(e) = &res {
343-
error!(self.log, "startup failed for {}: {:?}", name, e);
344-
return res;
345-
}
346-
}
347-
348-
Ok(())
313+
self.resume_vcpus();
349314
}
350315

351316
/// Pauses all of a VM's devices.

‎bin/propolis-server/src/lib/vm/request_queue.rs

-6
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,6 @@ impl std::fmt::Debug for StateChangeRequest {
8888
pub enum ComponentChangeRequest {
8989
/// Attempts to update the volume construction request for the supplied
9090
/// Crucible volume.
91-
///
92-
/// TODO: Due to https://github.com/oxidecomputer/crucible/issues/871, this
93-
/// is only allowed once the VM is started and the volume has activated, but
94-
/// it should be allowed even before the VM has started.
9591
ReconfigureCrucibleVolume {
9692
/// The ID of the Crucible backend in the VM's Crucible backend map.
9793
backend_id: SpecKey,
@@ -356,7 +352,6 @@ impl ExternalRequestQueue {
356352
"request" => ?request,
357353
"reason" => %reason
358354
);
359-
360355
return Err(reason);
361356
}
362357
}
@@ -795,7 +790,6 @@ mod test {
795790
fn mutation_disallowed_after_stopped() {
796791
let mut queue =
797792
ExternalRequestQueue::new(test_logger(), InstanceAutoStart::Yes);
798-
799793
queue.notify_request_completed(CompletedRequest::Start {
800794
succeeded: true,
801795
});

0 commit comments

Comments
 (0)