From 5719c284a90abe80db1f465d8f6bf079cf60121b Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 21 Feb 2024 11:28:32 -0800 Subject: [PATCH 1/4] server: return better HTTP errors when not ensured Currently, if the `propolis-server` APIs that require a running instance are called when the server has not received an `InstanceEnsure` request, we return a 404. Sled-agent and Nexus handle this status code by assuming *they* have done something wrong, such as attempting to call an API route that the server doesn't support. This results in issues where sled-agent assumes that it's the one that made a mistake when it tries to call APIs on a `propolis-server` that has lost its VM instance due to being restarted after a panic. To make it easier for callers distinguish between errors that indicate "this server doesn't know about a running instance" and errors that indicate "you attempted to call an API route that I don't know about", I've changed the status code returned by `propolis-server`s that haven't created an instance to HTTP [424 Failed Dependency]. The best thing about returning this status code is that it's "not 404", but it does arguably have the most correct semantics for this specific error: > The HTTP 424 Failed Dependency client error response code indicates > that the method could not be performed on the resource because the > requested action depended on another action, and that action failed. On the other hand, this is kind of a weird status code that's mostly used by WebDAV servers, so I'm open to using something else. We could alternatively return 410 Gone in these cases, but that status is already returned by servers whose VM instance has been deleted. I thought it might be nice to be able to distinguish between "not ensured" and "ensured, and then deleted"...but we could switch to 410 Gone here, too. [424 Failed Dependency]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/424 --- bin/propolis-server/src/lib/server.rs | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 81bb6cf65..08bc2f53e 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -296,12 +296,7 @@ impl DropshotEndpointContext { self.services.vm.lock().await, VmControllerState::as_controller, ) - .map_err(|_| { - HttpError::for_not_found( - None, - "Server not initialized (no instance)".to_string(), - ) - }) + .map_err(|_| no_instance_error()) } } @@ -785,10 +780,7 @@ async fn instance_get_common( ) -> Result<(api::Instance, VersionedInstanceSpec), HttpError> { let ctx = rqctx.context(); match &*ctx.services.vm.lock().await { - VmControllerState::NotCreated => Err(HttpError::for_not_found( - None, - "Server not initialized (no instance)".to_string(), - )), + VmControllerState::NotCreated => Err(no_instance_error()), VmControllerState::Created(vm) => { Ok(( api::Instance { @@ -862,10 +854,7 @@ async fn instance_state_monitor( let vm_state = ctx.services.vm.lock().await; match &*vm_state { VmControllerState::NotCreated => { - return Err(HttpError::for_not_found( - None, - "Server not initialized (no instance)".to_string(), - )); + return Err(no_instance_error()); } VmControllerState::Created(vm) => vm.state_watcher().clone(), VmControllerState::Destroyed { state_watcher, .. } => { @@ -885,12 +874,13 @@ async fn instance_state_monitor( // means it will never reach the number the client wants it to reach. // Inform the client of this condition so it doesn't wait forever. state_watcher.changed().await.map_err(|_| { - HttpError::for_status( - Some(format!( + HttpError::for_client_error( + Some(NO_INSTANCE.to_string()), + http::status::StatusCode::GONE, + format!( "No instance present; will never reach generation {}", gen - )), - http::status::StatusCode::GONE, + ), ) })?; } @@ -1206,6 +1196,16 @@ pub fn api() -> ApiDescription> { api } +const NO_INSTANCE: &str = "NO_INSTANCE"; + +fn no_instance_error() -> HttpError { + HttpError::for_client_error( + Some(NO_INSTANCE.to_string()), + http::StatusCode::FAILED_DEPENDENCY, + "Server not initialized (no instance)".to_string(), + ) +} + #[cfg(test)] mod tests { #[test] From 6a97df7db720c3d87424c47f4f4e6949aca74a52 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 21 Feb 2024 12:03:18 -0800 Subject: [PATCH 2/4] more descriptive names --- bin/propolis-server/src/lib/server.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 08bc2f53e..74c492b4b 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -296,7 +296,7 @@ impl DropshotEndpointContext { self.services.vm.lock().await, VmControllerState::as_controller, ) - .map_err(|_| no_instance_error()) + .map_err(|_| not_created_error()) } } @@ -780,7 +780,7 @@ async fn instance_get_common( ) -> Result<(api::Instance, VersionedInstanceSpec), HttpError> { let ctx = rqctx.context(); match &*ctx.services.vm.lock().await { - VmControllerState::NotCreated => Err(no_instance_error()), + VmControllerState::NotCreated => Err(not_created_error()), VmControllerState::Created(vm) => { Ok(( api::Instance { @@ -854,7 +854,7 @@ async fn instance_state_monitor( let vm_state = ctx.services.vm.lock().await; match &*vm_state { VmControllerState::NotCreated => { - return Err(no_instance_error()); + return Err(not_created_error()); } VmControllerState::Created(vm) => vm.state_watcher().clone(), VmControllerState::Destroyed { state_watcher, .. } => { @@ -1036,10 +1036,7 @@ async fn instance_migrate_status( let migration_id = path_params.into_inner().migration_id; let ctx = rqctx.context(); match &*ctx.services.vm.lock().await { - VmControllerState::NotCreated => Err(HttpError::for_not_found( - None, - "Server not initialized (no instance)".to_string(), - )), + VmControllerState::NotCreated => Err(not_created_error()), VmControllerState::Created(vm) => { vm.migrate_status(migration_id).map_err(Into::into).map(|state| { HttpResponseOk(api::InstanceMigrateStatusResponse { @@ -1198,7 +1195,7 @@ pub fn api() -> ApiDescription> { const NO_INSTANCE: &str = "NO_INSTANCE"; -fn no_instance_error() -> HttpError { +fn not_created_error() -> HttpError { HttpError::for_client_error( Some(NO_INSTANCE.to_string()), http::StatusCode::FAILED_DEPENDENCY, From 686d48b7df349fc2d56c0df3224d884cb20c9423 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 21 Feb 2024 12:03:28 -0800 Subject: [PATCH 3/4] update expected statuses in test --- phd-tests/tests/src/server_state_machine.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phd-tests/tests/src/server_state_machine.rs b/phd-tests/tests/src/server_state_machine.rs index b1ee13382..260e87f1c 100644 --- a/phd-tests/tests/src/server_state_machine.rs +++ b/phd-tests/tests/src/server_state_machine.rs @@ -37,15 +37,15 @@ async fn instance_stop_causes_destroy_test(ctx: &Framework) { assert!(matches!( vm.run().await.unwrap_err().status().unwrap(), - http::status::StatusCode::NOT_FOUND + http::status::StatusCode::FAILED_DEPENDENCY )); assert!(matches!( vm.stop().await.unwrap_err().status().unwrap(), - http::status::StatusCode::NOT_FOUND + http::status::StatusCode::FAILED_DEPENDENCY )); assert!(matches!( vm.reset().await.unwrap_err().status().unwrap(), - http::status::StatusCode::NOT_FOUND + http::status::StatusCode::FAILED_DEPENDENCY )); } From 3ad59e910ce785bfbda9c45692de7bf78640a69e Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 10 Apr 2024 09:45:29 -0700 Subject: [PATCH 4/4] move error codes to an enum `propolis-api-types` --- bin/propolis-server/src/lib/server.rs | 29 +++++++++-------- crates/propolis-api-types/src/lib.rs | 46 ++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/bin/propolis-server/src/lib/server.rs b/bin/propolis-server/src/lib/server.rs index 74c492b4b..39dcf2d34 100644 --- a/bin/propolis-server/src/lib/server.rs +++ b/bin/propolis-server/src/lib/server.rs @@ -571,19 +571,21 @@ async fn instance_ensure_common( { let existing_properties = existing.properties(); if existing_properties.id != properties.id { - return Err(HttpError::for_status( - Some(format!( + return Err(HttpError::for_client_error( + Some(api::ErrorCode::AlreadyInitialized.to_string()), + http::status::StatusCode::CONFLICT, + format!( "Server already initialized with ID {}", existing_properties.id - )), - http::status::StatusCode::CONFLICT, + ), )); } if *existing_properties != properties { - return Err(HttpError::for_status( - Some("Cannot update running server".to_string()), + return Err(HttpError::for_client_error( + Some(api::ErrorCode::AlreadyRunning.to_string()), http::status::StatusCode::CONFLICT, + "Cannot update running server".to_string(), )); } @@ -653,10 +655,11 @@ async fn instance_ensure_common( vm_hdl.await.unwrap() } .map_err(|e| { - HttpError::for_internal_error(format!( - "failed to create instance: {}", - e - )) + HttpError::for_client_error( + Some(api::ErrorCode::CreateFailed.to_string()), + http::status::StatusCode::INTERNAL_SERVER_ERROR, + format!("failed to create instance: {e}"), + ) })?; if let Some(ramfb) = vm.framebuffer() { @@ -875,7 +878,7 @@ async fn instance_state_monitor( // Inform the client of this condition so it doesn't wait forever. state_watcher.changed().await.map_err(|_| { HttpError::for_client_error( - Some(NO_INSTANCE.to_string()), + Some(api::ErrorCode::NoInstance.to_string()), http::status::StatusCode::GONE, format!( "No instance present; will never reach generation {}", @@ -1193,11 +1196,9 @@ pub fn api() -> ApiDescription> { api } -const NO_INSTANCE: &str = "NO_INSTANCE"; - fn not_created_error() -> HttpError { HttpError::for_client_error( - Some(NO_INSTANCE.to_string()), + Some(api::ErrorCode::NoInstance.to_string()), http::StatusCode::FAILED_DEPENDENCY, "Server not initialized (no instance)".to_string(), ) diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 3785a0845..3725f9754 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -4,7 +4,7 @@ //! Definitions for types exposed by the propolis-server API -use std::net::SocketAddr; +use std::{fmt, net::SocketAddr}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -395,3 +395,47 @@ pub struct SnapshotRequestPathParams { pub struct VCRRequestPathParams { pub id: Uuid, } + +/// Error codes used to populate the `error_code` field of Dropshot API responses. +#[derive( + Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize, JsonSchema, +)] +pub enum ErrorCode { + /// This `propolis-server` process has not received an `InstanceEnsure` + /// request yet. + NoInstance, + /// This `propolis-server` process has already received an `InstanceEnsure` + /// request with a different ID. + AlreadyInitialized, + /// Cannot update a running server. + AlreadyRunning, + /// Instance creation failed + CreateFailed, +} + +impl fmt::Display for ErrorCode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl std::str::FromStr for ErrorCode { + type Err = &'static str; + fn from_str(s: &str) -> Result { + match s.trim() { + s if s.eq_ignore_ascii_case("NoInstance") => Ok(Self::NoInstance), + s if s.eq_ignore_ascii_case("AlreadyInitialized") => { + Ok(ErrorCode::AlreadyInitialized) + } + s if s.eq_ignore_ascii_case("AlreadyRunning") => { + Ok(ErrorCode::AlreadyRunning) + } + s if s.eq_ignore_ascii_case("CreateFailed") => { + Ok(ErrorCode::CreateFailed) + } + _ => Err("unknown error code, expected one of: \ + 'NoInstance', 'AlreadyInitialized', 'AlreadyRunning', \ + 'CreateFailed'"), + } + } +}