Skip to content

Commit c7bb19d

Browse files
authored
Handle oximeter self-stat errors of all kinds (#7775)
Fixes #7255
1 parent 69eb54c commit c7bb19d

File tree

1 file changed

+28
-14
lines changed

1 file changed

+28
-14
lines changed

oximeter/collector/src/agent.rs

+28-14
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ mod tests {
684684
use omicron_common::api::internal::nexus::ProducerKind;
685685
use omicron_test_utils::dev::test_setup_log;
686686
use oximeter::types::ProducerResults;
687+
use reqwest::StatusCode;
687688
use std::net::Ipv6Addr;
688689
use std::net::SocketAddr;
689690
use std::net::SocketAddrV6;
@@ -969,17 +970,36 @@ mod tests {
969970
.unwrap()
970971
.statistics()
971972
.await;
972-
let count = stats
973+
974+
// The collections _should_ always fail due to a 500, but it's possible
975+
// that we also get collections failing because there's already one in
976+
// progress. See
977+
// https://github.com/oxidecomputer/omicron/issues/7255#issuecomment-2711537164
978+
// for example.
979+
//
980+
// Sum over all the expected reasons to get the correct count. We should
981+
// never get anything but a 500, or possibly an in-progress error.
982+
dbg!(&stats.failed_collections);
983+
let count: usize = stats
973984
.failed_collections
974-
.get(&FailureReason::Other(
975-
reqwest::StatusCode::INTERNAL_SERVER_ERROR,
976-
))
977-
.unwrap()
978-
.datum
979-
.value() as usize;
985+
.iter()
986+
.map(|(reason, value)| {
987+
let value = match reason {
988+
FailureReason::CollectionsInProgress => value,
989+
FailureReason::Other(sc)
990+
if sc == &StatusCode::INTERNAL_SERVER_ERROR =>
991+
{
992+
value
993+
}
994+
_ => panic!("Unexpected failure reason: {reason:?}"),
995+
};
996+
value.datum.value() as usize
997+
})
998+
.sum();
999+
assert!(count != 0);
9801000

1001+
// In any case, we should never have a _successful_ collection.
9811002
assert_eq!(stats.collections.datum.value(), 0);
982-
assert!(count != 0);
9831003

9841004
// The server may have handled a request that we've not yet recorded on
9851005
// our collection task side, so we allow the server count to be greater
@@ -994,12 +1014,6 @@ mod tests {
9941014
task ({count}) differs from the number reported by the always-ded \
9951015
producer server itself ({server_count})"
9961016
);
997-
assert_eq!(
998-
stats.failed_collections.len(),
999-
1,
1000-
"unexpected failed_collections content: {:?}",
1001-
stats.failed_collections,
1002-
);
10031017
logctx.cleanup_successful();
10041018
}
10051019

0 commit comments

Comments
 (0)