diff --git a/.github/buildomat/jobs/phd-build.sh b/.github/buildomat/jobs/phd-build.sh index 5caaacdc8..2131c21fb 100644 --- a/.github/buildomat/jobs/phd-build.sh +++ b/.github/buildomat/jobs/phd-build.sh @@ -30,7 +30,12 @@ rustc --version # Build the Propolis server binary with 'dev' profile to enable assertions that # should fire during tests. banner build-propolis -ptime -m cargo build --verbose -p propolis-server + +# Compile propolis-server so that it allows development features to be used even +# though the `omicron-build` feature is enabled. +export PHD_BUILD="true" +ptime -m cargo build --verbose -p propolis-server \ + --features omicron-build,failure-injection # The PHD runner requires unwind-on-panic to catch certain test failures, so # build it with the 'dev' profile which is so configured. diff --git a/.github/buildomat/phd-run-with-args.sh b/.github/buildomat/phd-run-with-args.sh index d7b6bb463..4ca389227 100755 --- a/.github/buildomat/phd-run-with-args.sh +++ b/.github/buildomat/phd-run-with-args.sh @@ -37,6 +37,18 @@ if [ ! -d "$tmpdir" ]; then mkdir $tmpdir fi +# We'll be using the reservoir, so set it to something higher than the default +# 0MiB. Most tests would only need 512MiB (the default PHD guest VM memory +# size), some tests want to run multiple VMs concurrently (particularly around +# migration). 4096MiB is an arbitrary number intended to support the above and +# that we might want to run those tests concurrently at some point. +# +# Currently the lab host these tests will run on is well-known and has much +# more memory than this. Hopefully we won't have Propolis CI running on a +# machine with ~4GiB of memory, but this number could be tuned down if the need +# arises. +pfexec /usr/lib/rsrvrctl -s 4096 + banner 'Tests' runner="$phddir/phd-runner" diff --git a/bin/propolis-server/Cargo.toml b/bin/propolis-server/Cargo.toml index c748d4dfb..98add8228 100644 --- a/bin/propolis-server/Cargo.toml +++ b/bin/propolis-server/Cargo.toml @@ -83,3 +83,9 @@ omicron-build = ["propolis/omicron-build"] # Falcon builds require corresponding bits turned on in the dependency libs falcon = ["propolis/falcon"] + +# Testing necessitates injecting failures which should hopefully be rare or even +# never occur on real otherwise-unperturbed systems. We conditionally compile +# code supporting failure injection to avoid the risk of somehow injecting +# failures into a real system not under test. +failure-injection = [] diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index a4ea795cb..7be62c8e1 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -814,7 +814,7 @@ impl MachineInitializer<'_> { Ok(()) } - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] pub fn initialize_test_devices(&mut self) { use propolis::hw::testdev::{ MigrationFailureDevice, MigrationFailures, diff --git a/bin/propolis-server/src/lib/migrate/preamble.rs b/bin/propolis-server/src/lib/migrate/preamble.rs index 4c6af125a..0201665d2 100644 --- a/bin/propolis-server/src/lib/migrate/preamble.rs +++ b/bin/propolis-server/src/lib/migrate/preamble.rs @@ -52,7 +52,7 @@ impl Preamble { }; match comp { - #[cfg(feature = "omicron-build")] + #[cfg(not(feature = "failure-injection"))] ReplacementComponent::MigrationFailureInjector(_) => { return Err(MigrateError::InstanceSpecsIncompatible( format!( @@ -62,7 +62,7 @@ impl Preamble { )); } - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] ReplacementComponent::MigrationFailureInjector(comp) => { let ComponentV0::MigrationFailureInjector(src) = to_amend else { diff --git a/bin/propolis-server/src/lib/spec/api_spec_v0.rs b/bin/propolis-server/src/lib/spec/api_spec_v0.rs index d97f982c5..18011c97c 100644 --- a/bin/propolis-server/src/lib/spec/api_spec_v0.rs +++ b/bin/propolis-server/src/lib/spec/api_spec_v0.rs @@ -27,7 +27,7 @@ use super::{ StorageDevice, }; -#[cfg(not(feature = "omicron-build"))] +#[cfg(feature = "failure-injection")] use super::MigrationFailure; #[cfg(feature = "falcon")] @@ -65,7 +65,7 @@ impl From for InstanceSpecV0 { serial, pci_pci_bridges, pvpanic, - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] migration_failure, #[cfg(feature = "falcon")] softnpu, @@ -157,7 +157,7 @@ impl From for InstanceSpecV0 { ); } - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] if let Some(mig) = migration_failure { insert_component( &mut spec, @@ -310,14 +310,14 @@ impl TryFrom for Spec { // apply it to the builder later. boot_settings = Some((device_id, settings)); } - #[cfg(feature = "omicron-build")] + #[cfg(not(feature = "failure-injection"))] ComponentV0::MigrationFailureInjector(_) => { return Err(ApiSpecError::FeatureCompiledOut { component: device_id, - feature: "omicron-build", + feature: "failure-injection", }); } - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] ComponentV0::MigrationFailureInjector(mig) => { builder.add_migration_failure_device(MigrationFailure { id: device_id, diff --git a/bin/propolis-server/src/lib/spec/builder.rs b/bin/propolis-server/src/lib/spec/builder.rs index c212790bd..ddd5c5e5b 100644 --- a/bin/propolis-server/src/lib/spec/builder.rs +++ b/bin/propolis-server/src/lib/spec/builder.rs @@ -26,7 +26,7 @@ use super::{ Board, BootOrderEntry, BootSettings, Disk, Nic, QemuPvpanic, SerialPort, }; -#[cfg(not(feature = "omicron-build"))] +#[cfg(feature = "failure-injection")] use super::MigrationFailure; #[cfg(feature = "falcon")] @@ -50,7 +50,7 @@ pub(crate) enum SpecBuilderError { #[error("pvpanic device already specified")] PvpanicInUse, - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] #[error("migration failure injection already enabled")] MigrationFailureInjectionInUse, @@ -269,7 +269,7 @@ impl SpecBuilder { Ok(self) } - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] pub fn add_migration_failure_device( &mut self, mig: MigrationFailure, diff --git a/bin/propolis-server/src/lib/spec/mod.rs b/bin/propolis-server/src/lib/spec/mod.rs index c6da82e67..5bf7f8836 100644 --- a/bin/propolis-server/src/lib/spec/mod.rs +++ b/bin/propolis-server/src/lib/spec/mod.rs @@ -34,7 +34,7 @@ use propolis_api_types::instance_spec::{ }; use thiserror::Error; -#[cfg(not(feature = "omicron-build"))] +#[cfg(feature = "failure-injection")] use propolis_api_types::instance_spec::components::devices::MigrationFailureInjector; #[cfg(feature = "falcon")] @@ -73,7 +73,7 @@ pub(crate) struct Spec { pub pci_pci_bridges: BTreeMap, pub pvpanic: Option, - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] pub migration_failure: Option, #[cfg(feature = "falcon")] @@ -285,7 +285,7 @@ pub struct QemuPvpanic { pub spec: QemuPvpanicDesc, } -#[cfg(not(feature = "omicron-build"))] +#[cfg(feature = "failure-injection")] #[derive(Clone, Debug)] pub struct MigrationFailure { pub id: SpecKey, diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index bef254f30..4ee2d1f23 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -564,10 +564,8 @@ async fn initialize_vm_objects( ))?; init.initialize_network_devices(&chipset).await?; - #[cfg(not(feature = "omicron-build"))] + #[cfg(feature = "failure-injection")] init.initialize_test_devices(); - #[cfg(feature = "omicron-build")] - info!(log, "`omicron-build` feature enabled, ignoring any test devices"); #[cfg(feature = "falcon")] { diff --git a/bin/propolis-server/src/main.rs b/bin/propolis-server/src/main.rs index a45718405..6e56186b1 100644 --- a/bin/propolis-server/src/main.rs +++ b/bin/propolis-server/src/main.rs @@ -278,6 +278,19 @@ fn main() -> anyhow::Result<()> { // Ensure proper setup of USDT probes register_probes().unwrap(); + #[cfg(all( + feature = "omicron-build", + any(feature = "failure-injection", feature = "falcon") + ))] + if option_env!("PHD_BUILD") != Some("true") { + panic!( + "`omicron-build` is enabled alongside development features, \ + this build is NOT SUITABLE for production. Set PHD_BUILD=true in \ + the environment and rebuild propolis-server if you really need \ + this to work." + ); + } + // Command line arguments. let args = Args::parse(); diff --git a/crates/propolis-api-types/src/instance_spec/components/devices.rs b/crates/propolis-api-types/src/instance_spec/components/devices.rs index 9dc86d031..7bb2d9fc5 100644 --- a/crates/propolis-api-types/src/instance_spec/components/devices.rs +++ b/crates/propolis-api-types/src/instance_spec/components/devices.rs @@ -193,8 +193,8 @@ pub struct P9fs { /// Describes a synthetic device that registers for VM lifecycle notifications /// and returns errors during attempts to migrate. /// -/// This is only supported by Propolis servers compiled without the -/// `omicron-build` feature. +/// This is only supported by Propolis servers compiled with the +/// `failure-injection` feature. #[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)] #[serde(deny_unknown_fields)] pub struct MigrationFailureInjector { diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 09b3a590d..030abd887 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1526,7 +1526,7 @@ ] }, "MigrationFailureInjector": { - "description": "Describes a synthetic device that registers for VM lifecycle notifications and returns errors during attempts to migrate.\n\nThis is only supported by Propolis servers compiled without the `omicron-build` feature.", + "description": "Describes a synthetic device that registers for VM lifecycle notifications and returns errors during attempts to migrate.\n\nThis is only supported by Propolis servers compiled with the `failure-injection` feature.", "type": "object", "properties": { "fail_exports": { diff --git a/xtask/src/task_phd.rs b/xtask/src/task_phd.rs index 4cb6eaedd..8c47f972c 100644 --- a/xtask/src/task_phd.rs +++ b/xtask/src/task_phd.rs @@ -4,7 +4,7 @@ use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; -use std::{fs, process::Command, time}; +use std::{collections::HashMap, fs, process::Command, time}; macro_rules! cargo_log { ($tag:literal, $($arg:tt)*) => { @@ -112,6 +112,16 @@ struct PropolisArgs { /// If set, build `propolis-server` in release mode. #[clap(long, short = 'r')] release: bool, + + /// If set, build `propolis-server` with `omicron-build`. This may enable + /// codepaths that only work on `stlouis`, or otherwise expect specific test + /// environment configuration (such as having a reservoir large enough to + /// allocate VMs). + /// + /// This can be helpful if you're debugging an issue that occurs in CI, + /// where `propolis-server` is also tested with `omicron-build`. + #[clap(long)] + omicron_build: bool, } #[derive(Debug, Clone, clap::Parser)] @@ -270,7 +280,7 @@ impl Cmd { return Ok(()); } Self::List { phd_args } => { - let phd_runner = build_bin("phd-runner", false)?; + let phd_runner = build_bin("phd-runner", false, None, None)?; let status = run_exit_code( phd_runner.command().arg("list").args(phd_args), )?; @@ -278,7 +288,7 @@ impl Cmd { } Self::RunnerHelp { phd_args } => { - let phd_runner = build_bin("phd-runner", false)?; + let phd_runner = build_bin("phd-runner", false, None, None)?; let status = run_exit_code( phd_runner.command().arg("help").args(phd_args), )?; @@ -292,7 +302,27 @@ impl Cmd { cmd } None => { - let bin = build_bin("propolis-server", propolis_args.release)?; + let mut server_build_env = HashMap::new(); + server_build_env + .insert("PHD_BUILD".to_string(), "true".to_string()); + + // Some PHD tests specifically cover cases where a component in + // the system has encountered an error, so enable + // failure-injection. Do not enable `omicron-build` by default + // because it configures `propolis-server` to expect an Omicron- + // or stlouis-specific environment. + let mut propolis_features = vec!["failure-injection"]; + if propolis_args.omicron_build { + // If you know your environment looks like we'd expect in + // Omicron, have at it! + propolis_features.push("omicron-build"); + } + let bin = build_bin( + "propolis-server", + propolis_args.release, + Some(&propolis_features.join(",")), + Some(server_build_env), + )?; let path = bin .path() .try_into() @@ -345,7 +375,7 @@ impl Cmd { anyhow::bail!("Missing artifacts config `{artifacts_toml}`!"); } - let phd_runner = build_bin("phd-runner", false)?; + let phd_runner = build_bin("phd-runner", false, None, None)?; let mut cmd = if cfg!(target_os = "illumos") { let mut cmd = Command::new("pfexec"); cmd.arg(phd_runner.path()); @@ -418,15 +448,30 @@ impl BasePropolisArgs { } } +/// Build the binary `name` in debug or release with an optional build +/// environment variables and a list of Cargo features. +/// +/// `features` is passed directly to Cargo, and so must be a space or +/// comma-separated list of features to activate. fn build_bin( name: impl AsRef, release: bool, + features: Option<&str>, + build_env: Option>, ) -> anyhow::Result { let name = name.as_ref(); cargo_log!("Compiling", "{name}"); let mut cmd = escargot::CargoBuild::new().package(name).bin(name).current_target(); + if let Some(features) = features { + cmd = cmd.features(features); + } + if let Some(env) = build_env { + for (k, v) in env { + cmd = cmd.env(k, v); + } + } let profile = if release { cmd = cmd.release(); "release [optimized]"