Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PHD: improve ctrl-c handling #634

Merged
merged 4 commits into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 39 additions & 12 deletions phd-tests/framework/src/test_vm/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

//! Routines and data structures for working with Propolis server processes.

use std::{fmt::Debug, net::SocketAddrV4};
use std::{fmt::Debug, net::SocketAddrV4, os::unix::process::CommandExt};

use anyhow::Result;
use camino::{Utf8Path, Utf8PathBuf};
Expand Down Expand Up @@ -62,18 +62,45 @@ impl PropolisServer {
let (server_stdout, server_stderr) =
log_mode.get_handles(&data_dir, vm_name)?;

let mut server_cmd = std::process::Command::new("pfexec");
server_cmd
.args([
server_path.as_str(),
"run",
config_toml_path.as_str(),
server_addr.to_string().as_str(),
vnc_addr.to_string().as_str(),
])
.stdout(server_stdout)
.stderr(server_stderr);

// Gracefully shutting down a Propolis server requires PHD to send an
// instance stop request to the server before it is actually terminated.
// This ensures that the server has a chance to clean up kernel VMM
// resources. It's desirable for the server to do this and not PHD
// because a failure to clean up VMMs on stop is a Propolis bug.
//
// The PHD runner sets up a SIGINT handler that tries to give the
// framework an opportunity to issue these requests before the runner
// exits. However, pressing Ctrl-C in a shell will typically broadcast
// SIGINT to all of the processes in the foreground process's group, not
// just to the foreground process itself. This means that a Ctrl-C press
// will usually kill all of PHD's Propolis servers before the cleanup
// logic can run.
//
// To avoid this problem, add a pre-`exec` hook that directs Propolis
// servers to ignore SIGINT. On Ctrl-C, the runner will drop all active
// `TestVm`s, and this drop path (if allowed to complete) will kill all
// these processes.
unsafe {
server_cmd.pre_exec(move || {
libc::signal(libc::SIGINT, libc::SIG_IGN);
Ok(())
});
}

let server = PropolisServer {
server: std::process::Command::new("pfexec")
.args([
server_path.as_str(),
"run",
config_toml_path.as_str(),
server_addr.to_string().as_str(),
vnc_addr.to_string().as_str(),
])
.stdout(server_stdout)
.stderr(server_stderr)
.spawn()?,
server: server_cmd.spawn()?,
address: server_addr,
};

Expand Down
83 changes: 69 additions & 14 deletions phd-tests/runner/src/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use std::sync::Arc;
use std::time::{Duration, Instant};

use phd_tests::phd_testcase::{Framework, TestCase, TestOutcome};
use tracing::{error, info};
use tokio::signal::unix::{signal, SignalKind};
use tokio::sync::watch;
use tracing::{error, info, warn};

use crate::config::RunOptions;
use crate::fixtures::TestFixtures;
Expand Down Expand Up @@ -76,32 +78,53 @@ pub async fn run_tests_with_ctx(
}

fixtures.execution_setup().unwrap();

let sigint_rx = set_sigint_handler();
info!("Running {} test(s)", executions.len());
let start_time = Instant::now();
'exec_loop: for execution in &mut executions {
for execution in &mut executions {
if *sigint_rx.borrow() {
info!("Test run interrupted by SIGINT");
break;
}

info!("Starting test {}", execution.tc.fully_qualified_name());

// Failure to run a setup fixture is fatal to the rest of the run, but
// it's still possible to report results, so return gracefully instead
// of panicking.
if let Err(e) = fixtures.test_setup() {
error!("Error running test setup fixture: {}", e);
break 'exec_loop;
break;
}

stats.tests_not_run -= 1;
let test_ctx = ctx.clone();
let tc = execution.tc;
let test_outcome =
match tokio::spawn(async move { tc.run(test_ctx.as_ref()).await })
.await
{
Ok(outcome) => outcome,
Err(_) => TestOutcome::Failed(Some(
"test task panicked, see test logs".to_string(),
)),
};
let mut sigint_rx_task = sigint_rx.clone();
let test_outcome = tokio::spawn(async move {
tokio::select! {
// Ensure interrupt signals are always handled instead of
// continuing to run the test.
biased;
result = sigint_rx_task.changed() => {
assert!(
result.is_ok(),
"SIGINT channel shouldn't drop while tests are running"
);

TestOutcome::Failed(
Some("test interrupted by SIGINT".to_string())
)
}
outcome = tc.run(test_ctx.as_ref()) => outcome
}
})
.await
.unwrap_or_else(|_| {
TestOutcome::Failed(Some(
"test task panicked, see test logs".to_string(),
))
});

info!(
"test {} ... {}{}",
Expand Down Expand Up @@ -132,7 +155,7 @@ pub async fn run_tests_with_ctx(
execution.status = Status::Ran(test_outcome);
if let Err(e) = fixtures.test_cleanup().await {
error!("Error running cleanup fixture: {}", e);
break 'exec_loop;
break;
}
}
stats.duration = start_time.elapsed();
Expand All @@ -141,3 +164,35 @@ pub async fn run_tests_with_ctx(

stats
}

/// Sets a global handler for SIGINT and hands the resulting signal channel over
/// to a task that handles this signal. Returns a receiver to which the signal
/// handler task publishes `true` to the channel when SIGINT is received.
fn set_sigint_handler() -> watch::Receiver<bool> {
let mut sigint =
signal(SignalKind::interrupt()).expect("failed to set SIGINT handler");

let (sigint_tx, sigint_rx) = watch::channel(false);
tokio::spawn(async move {
loop {
sigint.recv().await;

// If a signal was previously dispatched to the channel, exit
// immediately with the customary SIGINT exit code (130 is 128 +
// SIGINT). This allows users to interrupt tests even if they aren't
// at an await point (at the cost of not having destructors run).
if *sigint_tx.borrow() {
error!(
"SIGINT received while shutting down, rudely terminating"
);
error!("some processes and resources may have been leaked!");
std::process::exit(130);
}

warn!("SIGINT received, sending shutdown signal to tests");
let _ = sigint_tx.send(true);
}
});

sigint_rx
}
Loading