Skip to content

Commit 4ab0189

Browse files
authoredFeb 18, 2025
DTrace script to inspect VM exit reasons (#859)
This script reports the SVM exit codes for vCPUs as well as some information for select exit codes that include additional information (port IO accesses and NPF/nested #PF). The script also describes causes of Bogus exits out to the VMM. Other VM_EXITCODE_* causes are probably interesting too, but I'm not immediately sure what I'd want to know there. The output looks something like this, from a mildly loaded `propolis-standalone` that happens to use the ACPI PM timer (seen as accesses to IO port 0xb008): ``` === Exit summary, one second ending at 2025 Feb 14 23:47:33 === SVM code Count 78 16 64 514 60 806 400 1065 7b 2611 IOIO SVM exits: Port Op OpSz Addr Count b008 in 32b 64b 2611 NPF SVM exits: Guest PA Guest RIP #PF flags Count 80011008 ffffffffc02521dd D-UW- 0 8001100c ffffffffc02549dc D-UW- 0 fee00300 ffffffffa8065029 D-UR- 8 fee00300 ffffffffa806504c D-UW- 8 fee00310 ffffffffa8065038 D-UW- 8 fee000b0 ffffffffa806b472 D-UW- 516 fee00380 ffffffffa806b472 D-UW- 525 vm_run() VM_EXITCODE_BOGUS reasons: early_bailout 108 ```
1 parent c849bab commit 4ab0189

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed
 

‎scripts/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ individual files for details.
1111
- `nvme_trace.d`: Measure propolis-emulated NVMe read/write latency.
1212
- `time_adjustments.d`: Observe guest timing data adjustments on the target host
1313
of a live migration.
14+
- `vm_exit_codes.d`: Measure VM exits and information about them both for
15+
#VMEXIT events and returns to Propolis.

‎scripts/vm-exit-codes.d

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#pragma D option quiet
2+
3+
/*
4+
* Report reasons why a VM's vCPU has exited, and exits out to the VMM.
5+
*
6+
* A vCPU's exits are often handled entirely in bhyve, with the VMM unaware, and
7+
* an exit to VMM may not be the direct consequence of a VM exit. An exit to VMM
8+
* likely will only occur after at least one VMEXIT, though, when other
9+
* conditions are re-evaluated before resuming the vCPU. This script measures
10+
* and reports both SVM VMEXIT and exits to VMM because while separate they are
11+
* likely related in some way.
12+
*/
13+
14+
/*
15+
* From AMD APM Volume 2 Appendix C: "SVM Intercept Exit Codes"
16+
*
17+
* Subset of exit codes this script is particularly interested in.
18+
*/
19+
enum svm_exitcode {
20+
VMEXIT_INTR = 0x60,
21+
VMEXIT_VINTR = 0x64,
22+
VMEXIT_CPUID = 0x72,
23+
VMEXIT_IOIO = 0x7b,
24+
VMEXIT_NPF = 0x400
25+
};
26+
27+
/*
28+
* Exit codes that a VMM may receive after running a vCPU, taken from
29+
* illumos' `intel/sys/vmm.h`.
30+
*/
31+
enum vm_exitcode {
32+
VM_EXITCODE_INOUT,
33+
VM_EXITCODE_VMX,
34+
VM_EXITCODE_BOGUS,
35+
VM_EXITCODE_RDMSR,
36+
VM_EXITCODE_WRMSR,
37+
VM_EXITCODE_HLT,
38+
VM_EXITCODE_MTRAP,
39+
VM_EXITCODE_PAUSE,
40+
VM_EXITCODE_PAGING,
41+
VM_EXITCODE_INST_EMUL,
42+
VM_EXITCODE_RUN_STATE,
43+
VM_EXITCODE_MMIO_EMUL,
44+
VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */
45+
VM_EXITCODE_IOAPIC_EOI,
46+
VM_EXITCODE_SUSPENDED,
47+
VM_EXITCODE_MMIO,
48+
VM_EXITCODE_TASK_SWITCH,
49+
VM_EXITCODE_MONITOR,
50+
VM_EXITCODE_MWAIT,
51+
VM_EXITCODE_SVM,
52+
VM_EXITCODE_DEPRECATED2, /* formerly REQIDLE */
53+
VM_EXITCODE_DEBUG,
54+
VM_EXITCODE_VMINSN,
55+
VM_EXITCODE_BPT,
56+
VM_EXITCODE_HT,
57+
VM_EXITCODE_MAX
58+
};
59+
60+
BEGIN {
61+
misunderstood_exits = 0;
62+
}
63+
64+
fbt::vm_run:entry {
65+
/*
66+
* Some functions of interest here are only interesting when called
67+
* under vm_run, but may be called elsewhere as well. Keep track of
68+
* if we're in vm_run to scope other probes correspondingly.
69+
*/
70+
self->in_vm_run = 1;
71+
/*
72+
* Assuming we'll exit vm_run at some point, presume we don't know
73+
* why that exit occurred. We'll flip this to true in cases the script
74+
* knows about. Any exits that are not understood are a sign the script
75+
* is stale, the kernel has changed, or both.
76+
*/
77+
self->exit_understood = 0;
78+
self->next_exit_reason = "unknown";
79+
}
80+
81+
fbt::svm_launch:return {
82+
self->vcpu = (struct svm_vcpu *)NULL;
83+
}
84+
85+
fbt::svm_vmexit:entry {
86+
self->vcpu = &((struct svm_softc*)arg0)->vcpu[arg1];
87+
self->ctrl = self->vcpu->vmcb.ctrl;
88+
self->state = self->vcpu->vmcb.state;
89+
self->vmexit = (struct vm_exit*)arg2;
90+
91+
@exits[self->ctrl.exitcode] = count();
92+
93+
if (self->ctrl.exitcode == VMEXIT_IOIO) {
94+
this->opsz = (self->ctrl.exitinfo1 >> 4) & 7;
95+
this->addrsz = (self->ctrl.exitinfo1 >> 7) & 7;
96+
@io_info[
97+
self->ctrl.exitinfo1 >> 16,
98+
self->ctrl.exitinfo1 & 1 == 0 ? "out" : "in",
99+
this->opsz == 1 ? "8b" :
100+
this->opsz == 2 ? "16b" :
101+
this->opsz == 4 ? "32b" : "bogus",
102+
this->addrsz == 1 ? "16b" :
103+
this->addrsz == 2 ? "32b" :
104+
this->addrsz == 4 ? "64b" : "bogus"
105+
] = count();
106+
}
107+
108+
if (self->ctrl.exitcode == VMEXIT_NPF) {
109+
@npf_info[
110+
self->ctrl.exitinfo2,
111+
self->state.rip,
112+
/*
113+
* Instruction/Data access
114+
*/
115+
(self->ctrl.exitinfo1 >> 4) & 1 ? "I" : "D",
116+
/*
117+
* Processor read 1 in a PTE's reserved bits
118+
*/
119+
(self->ctrl.exitinfo1 >> 3) & 1 ? "R" : "-",
120+
/*
121+
* User/Supervisor (CPL=3 or not 3)
122+
*/
123+
(self->ctrl.exitinfo1 >> 2) & 1 ? "U" : "S",
124+
/*
125+
* Access is write or read
126+
*/
127+
(self->ctrl.exitinfo1 >> 1) & 1 ? "W" : "R",
128+
/*
129+
* Page is present or not
130+
*/
131+
(self->ctrl.exitinfo1 >> 0) & 1 ? "P" : "-"
132+
] = count();
133+
}
134+
}
135+
136+
fbt::vcpu_entry_bailout_checks:return / self->in_vm_run == 1 /{
137+
if (arg1 != 0) {
138+
self->exit_understood = 1;
139+
self->next_exit_reason = "early_bailout";
140+
}
141+
}
142+
143+
fbt::vcpu_run_state_pending:return / self->in_vm_run == 1 /{
144+
if (arg1 != 0) {
145+
self->exit_understood = 1;
146+
self->next_exit_reason = "run_state_pending";
147+
}
148+
}
149+
150+
fbt::vm_run:return / self->in_vm_run == 1 && self->vmexit != NULL / {
151+
self->in_vm_run = 0;
152+
153+
if (!self->exit_understood) {
154+
misunderstood_exits += 1;
155+
}
156+
if (self->vmexit->exitcode == VM_EXITCODE_BOGUS) {
157+
@bogus_reasons[self->next_exit_reason] = count();
158+
}
159+
}
160+
161+
tick-1s {
162+
printf("=== Exit summary, one second ending at %Y ===\n",
163+
walltimestamp);
164+
printf(" %8s %s\n", "SVM code", "Count");
165+
printa(" %8x %@8u\n", @exits);
166+
167+
printf("IOIO SVM exits:\n");
168+
printf(" %4s %3s %4s %4s %5s\n",
169+
"Port", "Op", "OpSz", "Addr", "Count");
170+
printa(" %4x %3s %4s %4s %@5d\n", @io_info);
171+
172+
printf("NPF SVM exits:\n");
173+
printf(" %-16s %-16s %5s %s\n",
174+
"Guest PA", "Guest RIP", "#PF flags", "Count");
175+
printa(" %16x %16x %5s%s%s%s%s %@8u\n", @npf_info);
176+
177+
printf("vm_run() VM_EXITCODE_BOGUS reasons:\n");
178+
printa(" %20s %@8u\n", @bogus_reasons);
179+
180+
if (misunderstood_exits > 0) {
181+
printf("Exits this script did not understand: %d\n",
182+
misunderstood_exits);
183+
misunderstood_exits = 0;
184+
}
185+
186+
printf("\n");
187+
188+
/*
189+
* Clear all accumulated data, but keep the most common keys to churn a
190+
* little less if there is relatively little activity and a key flips
191+
* from zero to non-zero counts regularly.
192+
*/
193+
trunc(@exits, 10);
194+
clear(@exits);
195+
trunc(@io_info, 10);
196+
clear(@io_info);
197+
trunc(@npf_info, 10);
198+
clear(@npf_info);
199+
trunc(@bogus_reasons, 10);
200+
clear(@bogus_reasons);
201+
}

0 commit comments

Comments
 (0)