Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test oximeter metrics end to end #855

Merged
merged 15 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions phd-tests/framework/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ bhyve_api.workspace = true
camino = { workspace = true, features = ["serde1"] }
cfg-if.workspace = true
cpuid_utils.workspace = true
dropshot.workspace = true
errno.workspace = true
fatfs.workspace = true
futures.workspace = true
flate2.workspace = true
hex.workspace = true
libc.workspace = true
newtype_derive.workspace = true
omicron-common.workspace = true
oximeter.workspace = true
propolis-client.workspace = true
reqwest = { workspace = true, features = ["blocking"] }
ring.workspace = true
Expand All @@ -31,6 +34,7 @@ serde_derive.workspace = true
serde_json.workspace = true
slog.workspace = true
slog-async.workspace = true
slog-bunyan.workspace = true
slog-term.workspace = true
tar.workspace = true
termwiz.workspace = true
Expand Down
17 changes: 10 additions & 7 deletions phd-tests/framework/src/disk/crucible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ use tracing::{error, info};
use uuid::Uuid;

use super::BlockSize;
use crate::{
disk::DeviceName, guest_os::GuestOsKind, server_log_mode::ServerLogMode,
};
use crate::{disk::DeviceName, guest_os::GuestOsKind, log_config::LogConfig};

/// An RAII wrapper around a directory containing Crucible data files. Deletes
/// the directory and its contents when dropped.
Expand Down Expand Up @@ -79,7 +77,7 @@ impl CrucibleDisk {
data_dir_root: &impl AsRef<Path>,
read_only_parent: Option<&impl AsRef<Path>>,
guest_os: Option<GuestOsKind>,
log_mode: ServerLogMode,
log_config: LogConfig,
) -> anyhow::Result<Self> {
Ok(Self {
device_name,
Expand All @@ -92,7 +90,7 @@ impl CrucibleDisk {
downstairs_ports,
data_dir_root,
read_only_parent,
log_mode,
log_config,
)?),
})
}
Expand Down Expand Up @@ -163,7 +161,7 @@ impl Inner {
downstairs_ports: &[u16],
data_dir_root: &impl AsRef<Path>,
read_only_parent: Option<&impl AsRef<Path>>,
log_mode: ServerLogMode,
log_config: LogConfig,
) -> anyhow::Result<Self> {
// To create a region, Crucible requires a block size, an extent size
// given as a number of blocks, and an extent count. Compute the latter
Expand Down Expand Up @@ -288,7 +286,12 @@ impl Inner {
dir_arg.as_ref(),
];

let (stdout, stderr) = log_mode.get_handles(
// NOTE: `log_format` is ignored here because Crucible determines
// Bunyan or plain formatting based on `atty::is()`. In practice
// this is fine, and matches what we want right now, but it might be
// nice to connect this more directly to the output desire expressed
// by the test runner.
let (stdout, stderr) = log_config.output_mode.get_handles(
data_dir_root,
&format!("crucible_{}_{}", disk_uuid, port),
)?;
Expand Down
10 changes: 5 additions & 5 deletions phd-tests/framework/src/disk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ use thiserror::Error;
use crate::{
artifacts::ArtifactStore,
guest_os::GuestOsKind,
log_config::LogConfig,
port_allocator::{PortAllocator, PortAllocatorError},
server_log_mode::ServerLogMode,
};

use self::{crucible::CrucibleDisk, file::FileBackedDisk};
Expand Down Expand Up @@ -190,7 +190,7 @@ pub(crate) struct DiskFactory {
port_allocator: Arc<PortAllocator>,

/// The logging discipline to use for Crucible server processes.
log_mode: ServerLogMode,
log_config: LogConfig,
}

impl DiskFactory {
Expand All @@ -201,13 +201,13 @@ impl DiskFactory {
storage_dir: &impl AsRef<Utf8Path>,
artifact_store: Arc<ArtifactStore>,
port_allocator: Arc<PortAllocator>,
log_mode: ServerLogMode,
log_config: LogConfig,
) -> Self {
Self {
storage_dir: storage_dir.as_ref().to_path_buf(),
artifact_store,
port_allocator,
log_mode,
log_config,
}
}
}
Expand Down Expand Up @@ -325,7 +325,7 @@ impl DiskFactory {
&self.storage_dir,
artifact_path.as_ref(),
guest_os,
self.log_mode,
self.log_config,
)
.map(Arc::new)
.map_err(Into::into)
Expand Down
12 changes: 6 additions & 6 deletions phd-tests/framework/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ use camino::Utf8PathBuf;
use disk::DiskFactory;
use futures::{stream::FuturesUnordered, StreamExt};
use guest_os::GuestOsKind;
use log_config::LogConfig;
use port_allocator::PortAllocator;
use server_log_mode::ServerLogMode;
pub use test_vm::TestVm;
use test_vm::{
environment::EnvironmentSpec, spec::VmSpec, VmConfig, VmLocation,
Expand All @@ -52,16 +52,16 @@ pub mod disk;
pub mod guest_os;
pub mod host_api;
pub mod lifecycle;
pub mod log_config;
mod port_allocator;
mod serial;
pub mod server_log_mode;
pub mod test_vm;
pub(crate) mod zfs;

/// An instance of the PHD test framework.
pub struct Framework {
pub(crate) tmp_directory: Utf8PathBuf,
pub(crate) server_log_mode: ServerLogMode,
pub(crate) log_config: LogConfig,

pub(crate) default_guest_cpus: u8,
pub(crate) default_guest_memory_mib: u64,
Expand Down Expand Up @@ -99,7 +99,7 @@ pub struct FrameworkParameters<'a> {
pub tmp_directory: Utf8PathBuf,
pub artifact_directory: Utf8PathBuf,
pub artifact_toml: Utf8PathBuf,
pub server_log_mode: ServerLogMode,
pub log_config: LogConfig,

pub default_guest_cpus: u8,
pub default_guest_memory_mib: u64,
Expand Down Expand Up @@ -187,14 +187,14 @@ impl Framework {
&params.tmp_directory,
artifact_store.clone(),
port_allocator.clone(),
params.server_log_mode,
params.log_config,
);

let (cleanup_task_tx, cleanup_task_rx) =
tokio::sync::mpsc::unbounded_channel();
Ok(Self {
tmp_directory: params.tmp_directory,
server_log_mode: params.server_log_mode,
log_config: params.log_config,
default_guest_cpus: params.default_guest_cpus,
default_guest_memory_mib: params.default_guest_memory_mib,
default_guest_os_artifact: params.default_guest_os_artifact,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,23 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Types and helpers specifying where a server process's stdout/stderr should
//! be recorded.
//! Types and helpers specifying how logs should be formatted and where they
//! should be directed.

use std::{path::Path, process::Stdio, str::FromStr};

use tracing::info;

/// Specifies where a process's output should be written.
/// Specifies how a test's logging should be managed.
#[derive(Debug, Clone, Copy)]
pub enum ServerLogMode {
pub struct LogConfig {
pub output_mode: OutputMode,
pub log_format: LogFormat,
}

/// Specifies where a output for a test's processes should be written.
#[derive(Debug, Clone, Copy)]
pub enum OutputMode {
/// Write to files in the server's factory's temporary directory.
TmpFile,

Expand All @@ -22,14 +29,14 @@ pub enum ServerLogMode {
Null,
}

impl FromStr for ServerLogMode {
impl FromStr for OutputMode {
type Err = std::io::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"file" | "tmpfile" => Ok(ServerLogMode::TmpFile),
"stdio" => Ok(ServerLogMode::Stdio),
"null" => Ok(ServerLogMode::Null),
"file" | "tmpfile" => Ok(OutputMode::TmpFile),
"stdio" => Ok(OutputMode::Stdio),
"null" => Ok(OutputMode::Null),
_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
s.to_string(),
Expand All @@ -38,7 +45,7 @@ impl FromStr for ServerLogMode {
}
}

impl ServerLogMode {
impl OutputMode {
/// Returns the stdout/stderr handles to pass to processes using the
/// specified logging mode.
///
Expand All @@ -54,7 +61,7 @@ impl ServerLogMode {
file_prefix: &str,
) -> anyhow::Result<(Stdio, Stdio)> {
match self {
ServerLogMode::TmpFile => {
OutputMode::TmpFile => {
let mut stdout_path = directory.as_ref().to_path_buf();
stdout_path.push(format!("{}.stdout.log", file_prefix));

Expand All @@ -67,8 +74,19 @@ impl ServerLogMode {
std::fs::File::create(stderr_path)?.into(),
))
}
ServerLogMode::Stdio => Ok((Stdio::inherit(), Stdio::inherit())),
ServerLogMode::Null => Ok((Stdio::null(), Stdio::null())),
OutputMode::Stdio => Ok((Stdio::inherit(), Stdio::inherit())),
OutputMode::Null => Ok((Stdio::null(), Stdio::null())),
}
}
}

/// Specifies how output for a test's processes should be structured.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LogFormat {
/// Format logs as plain hopefully human-readable output.
Plain,

/// Format logs as Bunyan output, more suitable for machine processing (such
/// as in CI).
Bunyan,
}
38 changes: 36 additions & 2 deletions phd-tests/framework/src/test_vm/environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,31 @@ pub enum VmLocation {
// TODO: Support remote VMs.
}

/// Specifies where test VMs should report metrics to, if anywhere.
#[derive(Clone, Copy, Debug)]
pub enum MetricsLocation {
/// Oximeter metrics should be reported to a server colocated with the test
/// VM to be started.
Local,
// When the time comes to support remote VMs, it will presumably be useful
// to have local and (perhaps multiple) remote VMs report metrics to the
// same server. But we don't support remote VMs yet.
}
Comment on lines +25 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multi-host PHD is obviously not very well fleshed-out yet, but I think the way I would develop it might make this enum obsolete. My 30,000-foot sketch of this is

  • of the several machines conducting a multi-host PHD run, one is primary and the others are secondary
  • the primary machine runs phd-runner and executes the actual test cases
  • the secondary machines run a phd-agent that exposes a Dropshot API that handles the tasks that need to be performed on remote machines (e.g. starting/stopping Propolis servers)
  • when invoking the runner on the primary, one passes the addresses of the agent servers

IIUC, even with an architecture like this, the fake Oximeter server still needs to be "local," even if the metrics are coming from a secondary host: submitted samples need to show up in the runner's Framework so that they can be queried by the tests running in that process.

This is obviated somewhat by switching to Clickhouse, I think, since then (I presume) there would be a single Clickhouse server instance that all PHD processes (both runner and agent) could insert data into, and the framework on the runner would just query that server. Even so, it still seems possible in that case for remote Propolis processes just to submit their samples to the collector in the primary runner.

WDYT?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i was figuring in a case like that we'd still use Local to send localhost:{oximeter_port} instead of something fancier, but overall that would boil it down to "wants oximeter: yes/no". i do think the plumbing to express that here today is a bit more awkward.. we don't really have a clean "set up dependent services" -> "start propolis-server" split in PHD and that does get to be more reorganizing than i want to do here 😅


#[derive(Clone, Debug)]
pub struct EnvironmentSpec {
pub(crate) location: VmLocation,
pub(crate) propolis_artifact: String,
pub(crate) metrics: Option<MetricsLocation>,
}

impl EnvironmentSpec {
pub(crate) fn new(location: VmLocation, propolis_artifact: &str) -> Self {
Self { location, propolis_artifact: propolis_artifact.to_owned() }
Self {
location,
propolis_artifact: propolis_artifact.to_owned(),
metrics: None,
}
}

pub fn location(&mut self, location: VmLocation) -> &mut Self {
Expand All @@ -37,6 +53,11 @@ impl EnvironmentSpec {
self
}

pub fn metrics(&mut self, metrics: Option<MetricsLocation>) -> &mut Self {
self.metrics = metrics;
self
}

pub(crate) async fn build<'a>(
&self,
framework: &'a Framework,
Expand Down Expand Up @@ -75,18 +96,31 @@ impl<'a> Environment<'a> {
.port_allocator
.next()
.context("getting VNC server port")?;
let metrics_addr = builder.metrics.and_then(|m| match m {
MetricsLocation::Local => {
// If the test requests metrics are local, we'll start
// an Oximeter stand-in for this VM when setting up this
// environment later. `start_local_vm` will patch in the
// actual server address when it has created one.
//
// If the VM is to be started remotely but requests
// "Local" metrics, that's probably an error.
None
}
});
let params = ServerProcessParameters {
server_path: propolis_server,
data_dir: framework.tmp_directory.as_path(),
server_addr: SocketAddrV4::new(
Ipv4Addr::new(127, 0, 0, 1),
server_port,
),
metrics_addr,
vnc_addr: SocketAddrV4::new(
Ipv4Addr::new(127, 0, 0, 1),
vnc_port,
),
log_mode: framework.server_log_mode,
log_config: framework.log_config,
};
Ok(Self::Local(params))
}
Expand Down
Loading
Loading