Skip to content

Commit 7c4cbe7

Browse files
committedMar 14, 2024·
More review feedback
- Added test to catch changes in kstate->oximeter vCPU microstate mapping - Make oximeter state names consts - Define the number of expected states explicitly as the len of an array of the state names.
1 parent 1ea6244 commit 7c4cbe7

File tree

2 files changed

+125
-22
lines changed

2 files changed

+125
-22
lines changed
 

‎bin/propolis-server/src/lib/server.rs

+15-9
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ use propolis_server_config::Config as VmTomlConfig;
3838
use rfb::server::VncServer;
3939
use slog::{error, info, o, warn, Logger};
4040
use thiserror::Error;
41-
use tokio::sync::{mpsc, oneshot, MappedMutexGuard, Mutex, MutexGuard, Notify};
41+
use tokio::sync::{mpsc, oneshot, MappedMutexGuard, Mutex, MutexGuard};
4242
use tokio_tungstenite::tungstenite::protocol::{Role, WebSocketConfig};
4343
use tokio_tungstenite::WebSocketStream;
4444

@@ -178,7 +178,12 @@ impl VmControllerState {
178178
pub struct OximeterState {
179179
/// A task spawned at initial startup to register with Nexus for metric
180180
/// production.
181-
registration_task: Option<(Arc<Notify>, tokio::task::JoinHandle<()>)>,
181+
///
182+
/// The oneshot sender will be used in the `ServiceProviders::stop()`
183+
/// method, to signal that the registration task itself should bail out, if
184+
/// it's still running.
185+
registration_task:
186+
Option<(oneshot::Sender<()>, tokio::task::JoinHandle<()>)>,
182187

183188
/// The host task for this Propolis server's Oximeter server.
184189
server_task: Option<tokio::task::JoinHandle<()>>,
@@ -233,9 +238,10 @@ impl ServiceProviders {
233238

234239
// Clean up oximeter tasks and statistic state.
235240
let mut oximeter_state = self.oximeter_state.lock().await;
236-
if let Some((shutdown, task)) = oximeter_state.registration_task.take()
241+
if let Some((shutdown_tx, task)) =
242+
oximeter_state.registration_task.take()
237243
{
238-
shutdown.notify_one();
244+
let _ = shutdown_tx.send(());
239245
task.abort();
240246
};
241247
if let Some(server) = oximeter_state.server_task.take() {
@@ -367,8 +373,7 @@ async fn register_oximeter_in_background(
367373
// we'll return the `oximeter` server and continue registration. In the
368374
// latter cases, we'll return early, since no metrics can be produced.
369375
let services_ = services.clone();
370-
let shutdown = Arc::new(Notify::new());
371-
let shutdown_ = Arc::clone(&shutdown);
376+
let (shutdown_tx, shutdown_rx) = oneshot::channel();
372377
let registration_task = tokio::task::spawn(async move {
373378
// Create the closure used to register with Nexus inside the backoff
374379
// loop.
@@ -427,7 +432,7 @@ async fn register_oximeter_in_background(
427432
}
428433
}
429434
}
430-
_ = shutdown_.notified() => {
435+
_ = shutdown_rx => {
431436
slog::debug!(
432437
log,
433438
"Nexus metric registration retry loop \
@@ -470,8 +475,9 @@ async fn register_oximeter_in_background(
470475
let old = state.stats.replace(stats);
471476
assert!(old.is_none());
472477
});
473-
let old =
474-
oximeter_state.registration_task.replace((shutdown, registration_task));
478+
let old = oximeter_state
479+
.registration_task
480+
.replace((shutdown_tx, registration_task));
475481
assert!(old.is_none());
476482
}
477483

‎bin/propolis-server/src/lib/stats/virtual_machine.rs

+110-13
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,33 @@ pub struct VcpuUsage {
202202
// for a definition of these states.
203203
fn kstat_microstate_to_state_name(ustate: &str) -> Option<&'static str> {
204204
match ustate {
205-
"time_emu_kern" | "time_emu_user" => Some("emulation"),
206-
"time_run" => Some("run"),
207-
"time_init" | "time_idle" => Some("idle"),
208-
"time_sched" => Some("waiting"),
205+
"time_emu_kern" | "time_emu_user" => Some(OXIMETER_EMULATION_STATE),
206+
"time_run" => Some(OXIMETER_RUN_STATE),
207+
"time_init" | "time_idle" => Some(OXIMETER_IDLE_STATE),
208+
"time_sched" => Some(OXIMETER_WAITING_STATE),
209209
_ => None,
210210
}
211211
}
212212

213+
// The definitions of each oximeter-level microstate we track.
214+
const OXIMETER_EMULATION_STATE: &str = "emulation";
215+
const OXIMETER_RUN_STATE: &str = "run";
216+
const OXIMETER_IDLE_STATE: &str = "idle";
217+
const OXIMETER_WAITING_STATE: &str = "waiting";
218+
const OXIMETER_STATES: [&str; 4] = [
219+
OXIMETER_EMULATION_STATE,
220+
OXIMETER_RUN_STATE,
221+
OXIMETER_IDLE_STATE,
222+
OXIMETER_WAITING_STATE,
223+
];
224+
225+
/// The number of expected vCPU microstates we track.
226+
///
227+
/// This is used to preallocate data structures for holding samples, and to
228+
/// limit the number of samples in the `KstatSampler`, if it is not pulled
229+
/// quickly enough by `oximeter`.
230+
pub(crate) const N_VCPU_MICROSTATES: u32 = OXIMETER_STATES.len() as _;
231+
213232
// The name of the kstat module containing virtual machine kstats.
214233
const VMM_KSTAT_MODULE_NAME: &str = "vmm";
215234

@@ -223,13 +242,6 @@ const VM_NAME_KSTAT: &str = "vm_name";
223242
// The name of kstat containing vCPU usage data.
224243
const VCPU_KSTAT_PREFIX: &str = "vcpu";
225244

226-
/// The number of expected vCPU microstates we track.
227-
///
228-
/// This is used to preallocate data structures for holding samples, and to
229-
/// limit the number of samples in the `KstatSampler`, if it is not pulled
230-
/// quickly enough by `oximeter`.
231-
pub(crate) const N_VCPU_MICROSTATES: u32 = 4;
232-
233245
#[cfg(all(not(test), target_os = "illumos"))]
234246
impl KstatTarget for VirtualMachine {
235247
// The VMM kstats are organized like so:
@@ -385,6 +397,7 @@ fn produce_vcpu_usage<'a>(
385397
#[cfg(test)]
386398
mod tests {
387399
use super::kstat_instance_from_instance_id;
400+
use super::kstat_microstate_to_state_name;
388401
use super::produce_vcpu_usage;
389402
use super::Data;
390403
use super::Kstat;
@@ -397,10 +410,16 @@ mod tests {
397410
use super::VMM_KSTAT_MODULE_NAME;
398411
use super::VM_KSTAT_NAME;
399412
use super::VM_NAME_KSTAT;
413+
use crate::stats::virtual_machine::N_VCPU_MICROSTATES;
414+
use crate::stats::virtual_machine::OXIMETER_EMULATION_STATE;
415+
use crate::stats::virtual_machine::OXIMETER_IDLE_STATE;
416+
use crate::stats::virtual_machine::OXIMETER_RUN_STATE;
417+
use crate::stats::virtual_machine::OXIMETER_WAITING_STATE;
400418
use oximeter::schema::SchemaSet;
401419
use oximeter::types::Cumulative;
402420
use oximeter::Datum;
403421
use oximeter::FieldValue;
422+
use std::collections::BTreeMap;
404423
use uuid::Uuid;
405424

406425
fn test_virtual_machine() -> VirtualMachine {
@@ -499,8 +518,10 @@ mod tests {
499518
2,
500519
"Should have samples for 'run' and 'idle' states"
501520
);
502-
for ((sample, state), x) in
503-
samples.iter().zip(["idle", "run"]).zip([2, 2])
521+
for ((sample, state), x) in samples
522+
.iter()
523+
.zip([OXIMETER_IDLE_STATE, OXIMETER_RUN_STATE])
524+
.zip([2, 2])
504525
{
505526
let st = sample
506527
.fields()
@@ -523,4 +544,80 @@ mod tests {
523544
assert_eq!(inner.value(), x);
524545
}
525546
}
547+
548+
// Sanity check that the mapping from lower-level `kstat` vCPU microstates
549+
// to the higher-level states we report to `oximeter` do not change.
550+
#[test]
551+
fn test_consistent_kstat_to_oximeter_microstate_mapping() {
552+
// Build our expected mapping from kstat-to-oximeter states.
553+
//
554+
// For each oximeter state, we pretend to have observed the kstat-level
555+
// microstates that go into it some number of times. We then check that
556+
// the number of actual observed mapped states (for each kstat-level
557+
// one) is matches our expectation.
558+
//
559+
// For example, the `time_emu_{kern,user}` states map to the
560+
// `"emulation"` state. If we observe 1 and 2 of those, respectively, we
561+
// should have a total of 3 observations of the `"emulation"` state.
562+
let mut expected_states = BTreeMap::new();
563+
expected_states.insert(
564+
OXIMETER_EMULATION_STATE,
565+
vec![("time_emu_kern", 1usize), ("time_emu_user", 2)],
566+
);
567+
expected_states.insert(
568+
OXIMETER_RUN_STATE,
569+
vec![("time_run", 4)], // Not equal to sum above
570+
);
571+
expected_states.insert(
572+
OXIMETER_IDLE_STATE,
573+
vec![("time_init", 5), ("time_idle", 6)],
574+
);
575+
expected_states.insert(OXIMETER_WAITING_STATE, vec![("time_sched", 7)]);
576+
assert_eq!(
577+
expected_states.len() as u32,
578+
N_VCPU_MICROSTATES,
579+
"Expected set of oximeter states does not match const",
580+
);
581+
582+
// "Observe" each kstat-level microstate a certain number of times, and
583+
// bump our counter of the oximeter state it maps to.
584+
let mut observed_states: BTreeMap<_, usize> = BTreeMap::new();
585+
for kstat_states in expected_states.values() {
586+
for (kstat_state, count) in kstat_states.iter() {
587+
let oximeter_state = kstat_microstate_to_state_name(
588+
kstat_state,
589+
)
590+
.unwrap_or_else(|| {
591+
panic!(
592+
"kstat state '{}' did not map to an oximeter state, \
593+
which it should have done. Did that state get \
594+
mapped to a new oximeter-level state?",
595+
kstat_state
596+
)
597+
});
598+
*observed_states.entry(oximeter_state).or_default() += count;
599+
}
600+
}
601+
602+
// Check that we've observed all the states correctly.
603+
assert_eq!(
604+
observed_states.len(),
605+
expected_states.len(),
606+
"Some oximeter-level states were not accounted for. \
607+
Did the set of oximeter states reported change?",
608+
);
609+
for (oximeter_state, count) in observed_states.iter() {
610+
let kstat_states = expected_states.get(oximeter_state).expect(
611+
"An unexpected oximeter state was produced. \
612+
Did the set of kstat or oximeter microstates \
613+
change?",
614+
);
615+
let expected_total: usize =
616+
kstat_states.iter().map(|(_, ct)| ct).sum();
617+
assert_eq!(
618+
*count, expected_total,
619+
"Some oximeter states were not accounted for",
620+
);
621+
}
622+
}
526623
}

0 commit comments

Comments
 (0)