Skip to content

Commit cf2dc26

Browse files
authored
lib: log vCPU diagnostics on triple fault and for some unhandled exit types (#795)
Add a `propolis::vcpu::Diagnostics` type that captures and pretty-prints the register state of a vCPU. Log this state of a vCPU triple-faults or (in propolis-server) if it raises a `Paging` or `InstEmul` exit that the binary does not handle. Formatting and logging register state increases the risk that a Propolis log will contain sensitive guest application data that happens to have been loaded into a register at the time the state was read. To help mitigate this, introduce a `GuestData` wrapper type that identifies this sort of guest application data. `GuestData`'s `Display` and `Debug` impls can be configured (using a library-level flag) to redact this data when it is formatted. `GuestData` is displayed by default in propolis-standalone and development builds of propolis-server and redacted in Omicron zone builds of propolis-server.
1 parent ff0b76d commit cf2dc26

File tree

14 files changed

+349
-50
lines changed

14 files changed

+349
-50
lines changed

bin/propolis-server/src/lib/migrate/source.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use bitvec::prelude::{BitSlice, Lsb0};
66
use futures::{SinkExt, StreamExt};
7-
use propolis::common::{GuestAddr, PAGE_SIZE};
7+
use propolis::common::{GuestAddr, GuestData, PAGE_SIZE};
88
use propolis::migrate::{
99
MigrateCtx, MigrateStateError, Migrator, PayloadOutputs,
1010
};
@@ -657,9 +657,12 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
657657
info!(self.log(), "ram_push: xfer RAM between {start:#x} and {end:#x}",);
658658
self.send_msg(memx::make_mem_xfer(start, end, bits)).await?;
659659
for addr in PageIter::new(start, end, bits) {
660-
let mut bytes = [0u8; PAGE_SIZE];
661-
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
662-
self.send_msg(codec::Message::Page(bytes.into())).await?;
660+
let mut byte_buffer = [0u8; PAGE_SIZE];
661+
{
662+
let mut bytes = GuestData::from(byte_buffer.as_mut_slice());
663+
self.read_guest_mem(GuestAddr(addr), &mut bytes).await?;
664+
}
665+
self.send_msg(codec::Message::Page(byte_buffer.into())).await?;
663666
probes::migrate_xfer_ram_page!(|| (addr, PAGE_SIZE as u64));
664667
}
665668
Ok(())
@@ -919,7 +922,7 @@ impl<'vm, T: MigrateConn> RonV0Runner<'vm, T> {
919922
async fn read_guest_mem(
920923
&mut self,
921924
addr: GuestAddr,
922-
buf: &mut [u8],
925+
buf: &mut GuestData<&mut [u8]>,
923926
) -> Result<(), MigrateError> {
924927
let objects = self.vm.lock_shared().await;
925928
let memctx = objects.access_mem().unwrap();

bin/propolis-server/src/lib/vcpu_tasks.rs

+78-1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ impl VcpuTasks {
159159
VmEntry::Run
160160
}
161161
VmExitKind::Suspended(SuspendDetail { kind, when }) => {
162+
use propolis::vcpu::Diagnostics;
162163
match kind {
163164
exits::Suspend::Halt => {
164165
event_handler.suspend_halt_event(when);
@@ -167,6 +168,13 @@ impl VcpuTasks {
167168
event_handler.suspend_reset_event(when);
168169
}
169170
exits::Suspend::TripleFault(vcpuid) => {
171+
slog::info!(
172+
&log,
173+
"triple fault on vcpu {}",
174+
vcpu.id;
175+
"state" => %Diagnostics::capture(vcpu)
176+
);
177+
170178
if vcpuid == -1 || vcpuid == vcpu.id {
171179
event_handler
172180
.suspend_triple_fault_event(vcpu.id, when);
@@ -187,7 +195,76 @@ impl VcpuTasks {
187195
task.force_hold();
188196
VmEntry::Run
189197
}
190-
_ => {
198+
VmExitKind::InstEmul(inst) => {
199+
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
200+
error!(log,
201+
"instruction emulation exit on vCPU {}",
202+
vcpu.id;
203+
"context" => ?inst,
204+
"vcpu_state" => %diag);
205+
206+
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
207+
VmEntry::Run
208+
}
209+
VmExitKind::Unknown(code) => {
210+
error!(log,
211+
"unrecognized exit code on vCPU {}",
212+
vcpu.id;
213+
"code" => code);
214+
215+
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
216+
VmEntry::Run
217+
}
218+
// Bhyve emits the `Bogus` exit kind when there is no actual
219+
// guest exit for user space to handle, but circumstances
220+
// nevertheless dictate that the kernel VMM should exit to
221+
// user space (e.g. a caller requested that all vCPUs be
222+
// forced to exit to user space so their threads can
223+
// rendezvous there).
224+
//
225+
// `process_vmexit` should always successfully handle this
226+
// exit, since it never entails any work that could fail to
227+
// be completed.
228+
VmExitKind::Bogus => {
229+
unreachable!(
230+
"propolis-lib always handles VmExitKind::Bogus"
231+
);
232+
}
233+
VmExitKind::Debug => {
234+
error!(log,
235+
"lib returned debug exit from vCPU {}",
236+
vcpu.id);
237+
238+
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
239+
VmEntry::Run
240+
}
241+
VmExitKind::VmxError(detail) => {
242+
error!(log,
243+
"unclassified VMX exit on vCPU {}",
244+
vcpu.id;
245+
"detail" => ?detail);
246+
247+
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
248+
VmEntry::Run
249+
}
250+
VmExitKind::SvmError(detail) => {
251+
error!(log,
252+
"unclassified SVM exit on vCPU {}",
253+
vcpu.id;
254+
"detail" => ?detail);
255+
256+
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
257+
VmEntry::Run
258+
}
259+
VmExitKind::Paging(gpa, fault_type) => {
260+
let diag = propolis::vcpu::Diagnostics::capture(vcpu);
261+
error!(log,
262+
"unhandled paging exit on vCPU {}",
263+
vcpu.id;
264+
"gpa" => gpa,
265+
"fault_type" => fault_type,
266+
"vcpu_state" => %diag);
267+
191268
event_handler.unhandled_vm_exit(vcpu.id, exit.kind);
192269
VmEntry::Run
193270
}

bin/propolis-server/src/main.rs

+7
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ fn run_server(
136136
Err(e).context("API version checks")?;
137137
}
138138

139+
// If this is a development image being run outside of an Omicron zone,
140+
// enable the display (in logs, panic messages, and the like) of diagnostic
141+
// data that may have originated in the guest.
142+
#[cfg(not(feature = "omicron-build"))]
143+
propolis::common::DISPLAY_GUEST_DATA
144+
.store(true, std::sync::atomic::Ordering::SeqCst);
145+
139146
let use_reservoir = config::reservoir_decide(&log);
140147

141148
let context = server::DropshotEndpointContext::new(

bin/propolis-standalone/src/main.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,9 @@ fn main() -> anyhow::Result<ExitCode> {
14211421
// Check that vmm and viona device version match what we expect
14221422
api_version_checks(&log).context("API version checks")?;
14231423

1424+
propolis::common::DISPLAY_GUEST_DATA
1425+
.store(true, std::sync::atomic::Ordering::SeqCst);
1426+
14241427
// Load/parse the config first, since it's required to size the tokio runtime
14251428
// used to run the instance.
14261429
let config = if restore {

lib/propolis/src/common.rs

+79
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,88 @@
55
use std::ops::{Add, BitAnd};
66
use std::ops::{Bound::*, RangeBounds};
77
use std::slice::SliceIndex;
8+
use std::sync::atomic::{AtomicBool, Ordering};
89

910
use crate::vmm::SubMapping;
1011

12+
/// Controls whether items wrapped in a [`GuestData`] are displayed or redacted
13+
/// when the wrappers are printed via their `Display` or `Debug` impls.
14+
//
15+
// The Propolis server binary should only link the Propolis lib once (any
16+
// structure that links the lib multiple times means something is very odd about
17+
// its dependency graph), so there should never be any ambiguity about what
18+
// `DISPLAY_GUEST_DATA` refers to when linking. But to be maximally cautious,
19+
// label this static as `no_mangle` so that pulling in multiple Propolis
20+
// libraries will break the build instead of possibly resolving ambiguously.
21+
#[no_mangle]
22+
pub static DISPLAY_GUEST_DATA: AtomicBool = AtomicBool::new(false);
23+
24+
/// A wrapper type denoting that the contained `T` was obtained from the guest
25+
/// (e.g. by reading the guest's memory). This type implements various traits
26+
/// (`Deref`, `DerefMut`, and `Borrow`) that allow it to be treated in most
27+
/// cases as just another instance of a `T`. The main difference is that this
28+
/// wrapper has custom `Display` and `Debug` implementations that redact the
29+
/// wrapped value unless the program has set the [`DISPLAY_GUEST_DATA`] flag.
30+
///
31+
/// NOTE: This wrapper type is not airtight: owners of a wrapper can always
32+
/// dereference it and invoke the Display/Debug impls directly on the resulting
33+
/// reference to the wrapped value. If `T` is `Clone`, they can also clone the
34+
/// dereferenced value and display the clone. (This comes with the territory
35+
/// here: users need to be able to get at the wrapped value to be able to do
36+
/// anything useful with it!)
37+
///
38+
/// NOTE: This type does not provide any other security guarantees (e.g. it does
39+
/// not ensure that the wrapped memory will be zeroed on drop).
40+
#[derive(Clone, Copy)]
41+
#[repr(transparent)]
42+
pub struct GuestData<T: ?Sized>(T);
43+
44+
impl<T: std::fmt::Display> std::fmt::Display for GuestData<T> {
45+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46+
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
47+
write!(f, "{}", self.0)
48+
} else {
49+
write!(f, "<guest data redacted>")
50+
}
51+
}
52+
}
53+
54+
impl<T: std::fmt::Debug> std::fmt::Debug for GuestData<T> {
55+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56+
if DISPLAY_GUEST_DATA.load(Ordering::Relaxed) {
57+
write!(f, "{:?}", self.0)
58+
} else {
59+
write!(f, "<guest data redacted>")
60+
}
61+
}
62+
}
63+
64+
impl<T> std::ops::Deref for GuestData<T> {
65+
type Target = T;
66+
67+
fn deref(&self) -> &Self::Target {
68+
&self.0
69+
}
70+
}
71+
72+
impl<T> std::ops::DerefMut for GuestData<T> {
73+
fn deref_mut(&mut self) -> &mut Self::Target {
74+
&mut self.0
75+
}
76+
}
77+
78+
impl<T> From<T> for GuestData<T> {
79+
fn from(value: T) -> Self {
80+
Self(value)
81+
}
82+
}
83+
84+
impl<T> std::borrow::Borrow<T> for GuestData<T> {
85+
fn borrow(&self) -> &T {
86+
&self.0
87+
}
88+
}
89+
1190
fn numeric_bounds(
1291
bound: impl RangeBounds<usize>,
1392
len: usize,

lib/propolis/src/exits.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use bhyve_api::{
1212
vm_suspend_how,
1313
};
1414

15+
use crate::common::GuestData;
16+
1517
/// Describes the reason for exiting execution of a vCPU.
1618
pub struct VmExit {
1719
/// The instruction pointer of the guest at the time of exit.
@@ -95,17 +97,19 @@ impl From<&bhyve_api::vm_exit_vmx> for VmxDetail {
9597

9698
#[derive(Copy, Clone, Debug)]
9799
pub struct InstEmul {
98-
pub inst_data: [u8; 15],
100+
pub inst_data: GuestData<[u8; 15]>,
99101
pub len: u8,
100102
}
103+
101104
impl InstEmul {
102105
pub fn bytes(&self) -> &[u8] {
103106
&self.inst_data[..usize::min(self.inst_data.len(), self.len as usize)]
104107
}
105108
}
106109
impl From<&bhyve_api::vm_inst_emul> for InstEmul {
107110
fn from(raw: &bhyve_api::vm_inst_emul) -> Self {
108-
let mut res = Self { inst_data: [0u8; 15], len: raw.num_valid };
111+
let mut res =
112+
Self { inst_data: GuestData::from([0u8; 15]), len: raw.num_valid };
109113
assert!(res.len as usize <= res.inst_data.len());
110114
res.inst_data.copy_from_slice(&raw.inst[..]);
111115

lib/propolis/src/hw/nvme/cmds.rs

+14-10
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,14 @@ pub enum AdminCmd {
5454
/// Asynchronous Event Request Command
5555
AsyncEventReq,
5656
/// An unknown admin command
57-
Unknown(#[allow(dead_code)] SubmissionQueueEntry),
57+
Unknown(#[allow(dead_code)] GuestData<SubmissionQueueEntry>),
5858
}
5959

6060
impl AdminCmd {
6161
/// Try to parse an `AdminCmd` out of a raw Submission Entry.
62-
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
62+
pub fn parse(
63+
raw: GuestData<SubmissionQueueEntry>,
64+
) -> Result<Self, ParseErr> {
6365
let cmd = match raw.opcode() {
6466
bits::ADMIN_OPC_DELETE_IO_SQ => {
6567
AdminCmd::DeleteIOSubQ(raw.cdw10 as u16)
@@ -645,12 +647,14 @@ pub enum NvmCmd {
645647
/// Read data and metadata
646648
Read(ReadCmd),
647649
/// An unknown NVM command
648-
Unknown(SubmissionQueueEntry),
650+
Unknown(GuestData<SubmissionQueueEntry>),
649651
}
650652

651653
impl NvmCmd {
652654
/// Try to parse an `NvmCmd` out of a raw Submission Entry.
653-
pub fn parse(raw: SubmissionQueueEntry) -> Result<Self, ParseErr> {
655+
pub fn parse(
656+
raw: GuestData<SubmissionQueueEntry>,
657+
) -> Result<Self, ParseErr> {
654658
let _fuse = match (raw.cdw0 >> 8) & 0b11 {
655659
0b00 => Ok(()), // Normal (non-fused) operation
656660
0b01 => Err(ParseErr::Fused), // First fused op
@@ -882,29 +886,29 @@ impl PrpIter<'_> {
882886
PrpNext::List(base, idx) => {
883887
assert!(idx <= PRP_LIST_MAX);
884888
let entry_addr = base + u64::from(idx) * 8;
885-
let entry: u64 = self
889+
let entry: GuestData<u64> = self
886890
.mem
887891
.read(GuestAddr(entry_addr))
888892
.ok_or_else(|| "Unable to read PRP list entry")?;
889893
probes::nvme_prp_entry!(
890894
|| (self as *const Self as u64, entry,)
891895
);
892896

893-
if entry & PAGE_OFFSET as u64 != 0 {
897+
if *entry & PAGE_OFFSET as u64 != 0 {
894898
return Err("Inappropriate PRP list entry offset");
895899
}
896900

897901
if self.remain <= PAGE_SIZE as u64 {
898-
(entry, self.remain, PrpNext::Done)
902+
(*entry, self.remain, PrpNext::Done)
899903
} else if idx != PRP_LIST_MAX {
900-
(entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
904+
(*entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1))
901905
} else {
902906
// The last PRP in this list chains to another
903907
// (page-aligned) list with the next PRP.
904-
self.next = PrpNext::List(entry, 0);
908+
self.next = PrpNext::List(*entry, 0);
905909
probes::nvme_prp_list!(|| (
906910
self as *const Self as u64,
907-
entry,
911+
*entry,
908912
0,
909913
));
910914
return self.get_next();

lib/propolis/src/hw/nvme/queue.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ impl SubQueue {
429429
pub fn pop(
430430
self: &Arc<SubQueue>,
431431
mem: &MemCtx,
432-
) -> Option<(SubmissionQueueEntry, Permit, u16)> {
432+
) -> Option<(GuestData<SubmissionQueueEntry>, Permit, u16)> {
433433
// Attempt to reserve an entry on the Completion Queue
434434
let permit = self.cq.reserve_entry(&self)?;
435435
let mut state = self.state.lock();

lib/propolis/src/hw/qemu/fwcfg.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ impl FwCfg {
599599

600600
let mem_guard = self.acc_mem.access().expect("usable mem accessor");
601601
let mem = mem_guard.deref();
602-
let req: FwCfgDmaAccess =
602+
let req: GuestData<FwCfgDmaAccess> =
603603
mem.read(GuestAddr(addr)).ok_or(FwCfgErr::BadAddr)?;
604604

605605
fn dma_write_result(
@@ -632,7 +632,7 @@ impl FwCfg {
632632
fn dma_operation(
633633
&self,
634634
state: &mut MutexGuard<State>,
635-
req: FwCfgDmaAccess,
635+
req: GuestData<FwCfgDmaAccess>,
636636
mem: &MemCtx,
637637
) -> Result<(), FwCfgErr> {
638638
let opts = FwCfgDmaCtrl::from_bits_truncate(req.ctrl.get());
@@ -995,9 +995,9 @@ mod test {
995995
submit_dma_req(&dev, req_addr);
996996

997997
// DMA should have successfully completed now
998-
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
998+
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
999999
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
1000-
assert_eq!(&data, "QEMU".as_bytes());
1000+
assert_eq!(&*data, "QEMU".as_bytes());
10011001
}
10021002

10031003
#[test]
@@ -1019,9 +1019,9 @@ mod test {
10191019
submit_dma_req(&dev, req_addr);
10201020

10211021
// DMA should have successfully completed now
1022-
assert_eq!(mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
1022+
assert_eq!(*mem.read::<u32>(GuestAddr(req_addr)).unwrap(), 0);
10231023
let data = mem.read::<[u8; 4]>(GuestAddr(dma_addr)).unwrap();
1024-
assert_eq!(data, [0u8; 4]);
1024+
assert_eq!(*data, [0u8; 4]);
10251025
}
10261026

10271027
#[test]

0 commit comments

Comments
 (0)