From dc7712b8007a43aa1dbb7ef5e3e7ff4b6a07bad5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 20 Feb 2025 20:45:20 +0000 Subject: [PATCH 1/4] propolis-server should not crash when failing to start a VM Fixes #838 --- bin/propolis-server/src/lib/vm/ensure.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 0755b9881..443c05166 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -290,7 +290,26 @@ impl<'a> VmEnsureNotStarted<'a> { kernel_vm_paused: false, }) } - Err(e) => Err(self.fail(e).await), + Err(e) => { + // In the happy path, `vmm_rt` is moved to the caller inside + // `VmEnsureObjectsCreated`. Here, we would `self.fail()` and + // implicitly drop `vmm_rt` before returning up the stack. This + // would result in a Tokio panic like `Cannot drop a runtime in + // a context where blocking is not allowed`, killing + // `propolis-server` and even resulting in API clients just + // losing connections instead of getting well-formed error + // responses back. + // + // So instead, spawn_blocking() to get a context where blocking + // is allowed, drop the `vmm_rt` there, and continue on with + // whatever unfortunate failure has transpired. + tokio::task::spawn_blocking(move || { + std::mem::drop(vmm_rt); + }) + .await + .expect("can drop vmm_rt"); + Err(self.fail(e).await) + } } } From b614d506ef6632264a61a0eea265cb22088a3101 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 21 Feb 2025 01:15:46 +0000 Subject: [PATCH 2/4] be better about not dropping the VMM runtime this approach also fixes the immediate issue, but also makes sure that we have a clear way to do fallible init on the way to VmEnsureObjectsCreated. another approach here could be to move the call tree leading to VMM runtime creation and init into a separate thread, be it spawn_blocking or a scoped thread. this would start from `create_and_activate_vm()` to capture both the migration-based and local-request-based init. i'm just hesitant about moving more than strictly needed into "strange" scopes that might make future-us have to think twice about the execution environment. --- bin/propolis-server/src/lib/vm/ensure.rs | 111 +++++++++++++---------- 1 file changed, 62 insertions(+), 49 deletions(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 443c05166..950e63b15 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -236,39 +236,70 @@ impl<'a> VmEnsureNotStarted<'a> { }, )); - // Create the runtime that will host tasks created by VMM components - // (e.g. block device runtime tasks). - let vmm_rt = tokio::runtime::Builder::new_multi_thread() - .thread_name("tokio-rt-vmm") - .worker_threads(usize::max( - VMM_MIN_RT_THREADS, - VMM_BASE_RT_THREADS + spec.board.cpus as usize, - )) - .enable_all() - .build()?; - let log_for_init = self.log.clone(); let properties = self.ensure_request.properties.clone(); let options = self.ensure_options.clone(); let queue_for_init = input_queue.clone(); - let init_result = vmm_rt - .spawn(async move { - initialize_vm_objects( - log_for_init, - spec, - properties, - options, - queue_for_init, - ) - .await - }) - .await - .map_err(|e| { - anyhow::anyhow!("failed to join VM object creation task: {e}") - })?; - - match init_result { - Ok(objects) => { + + // Either the block following this succeeds with both a Tokio runtime + // and VM objects, or entirely fails with no partial state for us to + // clean up. + type InitResult = + anyhow::Result<(tokio::runtime::Runtime, InputVmObjects)>; + + // We need to create a new runtime to host the tasks for this VMM's + // objects, but that initialization is fallible and results in dropping + // the fledgling VMM runtime itself. Dropping a Tokio runtime on a + // worker thread in a Tokio runtime will panic, so do all init in a + // `spawn_blocking` where this won't be an issue. + // + // When the runtime is returned to this thread, it must not be dropped. + // That means that the path between this result and returning an + // `Ok(VmEnsureObjectsCreated)` must be infallible. + // + // `VmEnsureObjectsCreated` (and later state transitions) take care to + // `shutdown_background` the runtime. + let result: InitResult = tokio::task::spawn_blocking(move || { + // Create the runtime that will host tasks created by + // VMM components (e.g. block device runtime tasks). + let vmm_rt = tokio::runtime::Builder::new_multi_thread() + .thread_name("tokio-rt-vmm") + .worker_threads(usize::max( + VMM_MIN_RT_THREADS, + VMM_BASE_RT_THREADS + spec.board.cpus as usize, + )) + .enable_all() + .build()?; + + let init_result = vmm_rt + .block_on(async move { + initialize_vm_objects( + log_for_init, + spec, + properties, + options, + queue_for_init, + ) + .await + }) + .map_err(|e| { + anyhow::anyhow!( + "failed to join VM object creation task: {e}" + ) + })?; + Ok((vmm_rt, init_result)) + }) + .await + .map_err(|e| { + // This is extremely unexpected: if the join failed, the init + // task panicked or was cancelled. If the init itself failed, + // which is somewhat more reasonable, we would expect the join + // to succeed and have an error below. + anyhow::anyhow!("failed to join VMM runtime init task: {e}") + })?; + + match result { + Ok((vmm_rt, objects)) => { // N.B. Once these `VmObjects` exist, it is no longer safe to // call `vm_init_failed`. let objects = Arc::new(VmObjects::new( @@ -290,26 +321,8 @@ impl<'a> VmEnsureNotStarted<'a> { kernel_vm_paused: false, }) } - Err(e) => { - // In the happy path, `vmm_rt` is moved to the caller inside - // `VmEnsureObjectsCreated`. Here, we would `self.fail()` and - // implicitly drop `vmm_rt` before returning up the stack. This - // would result in a Tokio panic like `Cannot drop a runtime in - // a context where blocking is not allowed`, killing - // `propolis-server` and even resulting in API clients just - // losing connections instead of getting well-formed error - // responses back. - // - // So instead, spawn_blocking() to get a context where blocking - // is allowed, drop the `vmm_rt` there, and continue on with - // whatever unfortunate failure has transpired. - tokio::task::spawn_blocking(move || { - std::mem::drop(vmm_rt); - }) - .await - .expect("can drop vmm_rt"); - Err(self.fail(e).await) - } + + Err(e) => Err(self.fail(e).await), } } From c05401bcc7f2b9314894ed469aa6905ebeff75ef Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 21 Feb 2025 01:30:47 +0000 Subject: [PATCH 3/4] whitespace --- bin/propolis-server/src/lib/vm/ensure.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 950e63b15..494edf989 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -321,7 +321,6 @@ impl<'a> VmEnsureNotStarted<'a> { kernel_vm_paused: false, }) } - Err(e) => Err(self.fail(e).await), } } From 7053acf3b599981df0f471107bf18060145f329a Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 21 Feb 2025 02:10:50 +0000 Subject: [PATCH 4/4] shuffle around who must take care to what --- bin/propolis-server/src/lib/vm/ensure.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 494edf989..e8c8b3922 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -256,9 +256,6 @@ impl<'a> VmEnsureNotStarted<'a> { // When the runtime is returned to this thread, it must not be dropped. // That means that the path between this result and returning an // `Ok(VmEnsureObjectsCreated)` must be infallible. - // - // `VmEnsureObjectsCreated` (and later state transitions) take care to - // `shutdown_background` the runtime. let result: InitResult = tokio::task::spawn_blocking(move || { // Create the runtime that will host tasks created by // VMM components (e.g. block device runtime tasks). @@ -341,6 +338,12 @@ impl<'a> VmEnsureNotStarted<'a> { /// Represents an instance ensure request that has proceeded far enough to /// create a set of VM objects, but that has not yet installed those objects as /// an `ActiveVm` or notified the requestor that its request is complete. +/// +/// WARNING: dropping `VmEnsureObjectsCreated` is a panic risk since dropping +/// the contained `tokio::runtime::Runtime` on in a worker thread will panic. It +/// is probably a bug to drop `VmEnsureObjectsCreated`, as it is expected users +/// will quickly call [`VmEnsureObjectsCreated::ensure_active`], but if you +/// must, take care in handling the contained `vmm_rt`. pub(crate) struct VmEnsureObjectsCreated<'a> { log: &'a slog::Logger, vm: &'a Arc,