Skip to content

Commit 9e823ec

Browse files
committed
DTrace script to inspect VM exit reasons
This script reports the SVM exit codes for vCPUs as well as some information for select exit codes that include additional information (port IO accesses and NPF/nested #PF). The script also describes causes of Bogus exits out to the VMM. Other VM_EXITCODE_* causes are probably interesting too, but I'm not immediately sure what I'd want to know there. The output looks something lke this, from a mildly loaded `propolis-standalone` that happens to use the ACPI PM timer (seen as accesses to IO port 0xb008): ``` === Exit summary, one second ending at 2025 Feb 14 23:47:33 === SVM code Count 78 16 64 514 60 806 400 1065 7b 2611 IOIO SVM exits: Port Op OpSz Addr Count b008 in 32b 64b 2611 NPF SVM exits: Guest PA Guest RIP #PF flags Count 80011008 ffffffffc02521dd D-UW- 0 8001100c ffffffffc02549dc D-UW- 0 fee00300 ffffffffa8065029 D-UR- 8 fee00300 ffffffffa806504c D-UW- 8 fee00310 ffffffffa8065038 D-UW- 8 fee000b0 ffffffffa806b472 D-UW- 516 fee00380 ffffffffa806b472 D-UW- 525 Unhandled by svm_vmexit(): SVM code Count 78 16 400 1065 7b 2611 vmrun() VM_EXITCODE_BOGUS reasons: early_bailout 108 ```
1 parent c849bab commit 9e823ec

File tree

2 files changed

+218
-0
lines changed

2 files changed

+218
-0
lines changed

scripts/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ individual files for details.
1111
- `nvme_trace.d`: Measure propolis-emulated NVMe read/write latency.
1212
- `time_adjustments.d`: Observe guest timing data adjustments on the target host
1313
of a live migration.
14+
- `vm_exit_codes.d`: Measure VM exits and information about them both for
15+
#VMEXIT events and returns to Propolis.

scripts/vm-exit-codes.d

+216
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#pragma D option quiet
2+
3+
/*
4+
* Report reasons why a VM's vCPU has exited, and exits out to the VMM.
5+
*
6+
* A vCPU's exits are often handled entirely in bhyve, with the VMM unaware, and
7+
* an exit to VMM may not be the direct consequence of a VM exit. An exit to VMM
8+
* likely will only occur after at least one VMEXIT, though, when other
9+
* conditions are re-evaluated before resuming the vCPU. This script measures
10+
* and reports both SVM VMEXIT and exits to VMM because while separate they are
11+
* likely related in some way.
12+
*/
13+
14+
/*
15+
* From AMD APM Volume 2 Appendix C: "SVM Intercept Exit Codes"
16+
*
17+
* Subset of exit codes this script is particularly interested in.
18+
*/
19+
enum svm_exitcode {
20+
VMEXIT_INTR = 0x60,
21+
VMEXIT_VINTR = 0x64,
22+
VMEXIT_CPUID = 0x72,
23+
VMEXIT_IOIO = 0x7b,
24+
VMEXIT_NPF = 0x400
25+
};
26+
27+
/*
28+
* Exit codes that a VMM may receive after running a vCPU, taken from
29+
* illumos' `intel/sys/vmm.h`.
30+
*/
31+
enum vm_exitcode {
32+
VM_EXITCODE_INOUT,
33+
VM_EXITCODE_VMX,
34+
VM_EXITCODE_BOGUS,
35+
VM_EXITCODE_RDMSR,
36+
VM_EXITCODE_WRMSR,
37+
VM_EXITCODE_HLT,
38+
VM_EXITCODE_MTRAP,
39+
VM_EXITCODE_PAUSE,
40+
VM_EXITCODE_PAGING,
41+
VM_EXITCODE_INST_EMUL,
42+
VM_EXITCODE_RUN_STATE,
43+
VM_EXITCODE_MMIO_EMUL,
44+
VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */
45+
VM_EXITCODE_IOAPIC_EOI,
46+
VM_EXITCODE_SUSPENDED,
47+
VM_EXITCODE_MMIO,
48+
VM_EXITCODE_TASK_SWITCH,
49+
VM_EXITCODE_MONITOR,
50+
VM_EXITCODE_MWAIT,
51+
VM_EXITCODE_SVM,
52+
VM_EXITCODE_DEPRECATED2, /* formerly REQIDLE */
53+
VM_EXITCODE_DEBUG,
54+
VM_EXITCODE_VMINSN,
55+
VM_EXITCODE_BPT,
56+
VM_EXITCODE_HT,
57+
VM_EXITCODE_MAX
58+
};
59+
60+
BEGIN {
61+
misunderstood_exits = 0;
62+
}
63+
64+
fbt::svm_vmrun:entry {
65+
/*
66+
* Some functions of interest here are only interesting when called
67+
* under svm_vmrun, but may be called elsewhere as well. Keep track of
68+
* if we're in svm_vmrun to scope other probes correspondingly.
69+
*/
70+
self->in_vmrun = 1;
71+
/*
72+
* Assuming we'll exit svm_vmrun at some point, presume we don't know
73+
* why that exit occurred. We'll flip this to true in cases the script
74+
* knows about. Any exits that are not understood are a sign the script
75+
* is stale, the kernel has changed, or both.
76+
*/
77+
self->exit_understood = 0;
78+
self->next_exit_reason = "unknown";
79+
}
80+
81+
fbt::svm_launch:return {
82+
self->vcpu = (struct svm_vcpu *)NULL;
83+
}
84+
85+
fbt::svm_vmexit:entry {
86+
self->vcpu = &((struct svm_softc*)arg0)->vcpu[arg1];
87+
self->ctrl = self->vcpu->vmcb.ctrl;
88+
self->state = self->vcpu->vmcb.state;
89+
self->vmexit = (struct vm_exit*)arg2;
90+
91+
@exits[self->ctrl.exitcode] = count();
92+
93+
if (self->ctrl.exitcode == VMEXIT_IOIO) {
94+
this->opsz = (self->ctrl.exitinfo1 >> 4) & 7;
95+
this->addrsz = (self->ctrl.exitinfo1 >> 7) & 7;
96+
@io_info[
97+
self->ctrl.exitinfo1 >> 16,
98+
self->ctrl.exitinfo1 & 1 == 0 ? "out" : "in",
99+
this->opsz == 1 ? "8b" :
100+
this->opsz == 2 ? "16b" :
101+
this->opsz == 4 ? "32b" : "bogus",
102+
this->addrsz == 1 ? "16b" :
103+
this->addrsz == 2 ? "32b" :
104+
this->addrsz == 4 ? "64b" : "bogus"
105+
] = count();
106+
}
107+
108+
if (self->ctrl.exitcode == VMEXIT_NPF) {
109+
@npf_info[
110+
self->ctrl.exitinfo2,
111+
self->state.rip,
112+
/*
113+
* Instruction/Data access
114+
*/
115+
(self->ctrl.exitinfo1 >> 4) & 1 ? "I" : "D",
116+
/*
117+
* Processor read 1 in a PTE's reserved bits
118+
*/
119+
(self->ctrl.exitinfo1 >> 3) & 1 ? "R" : "-",
120+
/*
121+
* User/Supervisor (CPL=3 or not 3)
122+
*/
123+
(self->ctrl.exitinfo1 >> 2) & 1 ? "U" : "S",
124+
/*
125+
* Access is write or read
126+
*/
127+
(self->ctrl.exitinfo1 >> 1) & 1 ? "W" : "R",
128+
/*
129+
* Page is present or not
130+
*/
131+
(self->ctrl.exitinfo1 >> 0) & 1 ? "P" : "-"
132+
] = count();
133+
}
134+
}
135+
136+
fbt::svm_vmexit:return {
137+
if (arg1 == 0) {
138+
self->exit_understood = 1;
139+
self->next_exit_reason = "unhandled_vmexit";
140+
@unhandled_vmexits[self->ctrl.exitcode] = count();
141+
any_unhandled_exits = 1;
142+
}
143+
}
144+
145+
fbt::vcpu_entry_bailout_checks:return / self->in_vmrun == 1 /{
146+
if (arg1 != 0) {
147+
self->exit_understood = 1;
148+
self->next_exit_reason = "early_bailout";
149+
}
150+
}
151+
152+
fbt::vcpu_run_state_pending:return / self->in_vmrun == 1 /{
153+
if (arg1 != 0) {
154+
self->exit_understood = 1;
155+
self->next_exit_reason = "run_state_pending";
156+
}
157+
}
158+
159+
fbt::svm_vmrun:return / self->in_vmrun == 1 && self->vmexit != NULL / {
160+
self->in_vmrun = 0;
161+
162+
if (!self->exit_understood) {
163+
misunderstood_exits += 1;
164+
}
165+
if (self->vmexit->exitcode == 2) {
166+
@bogus_reasons[self->next_exit_reason] = count();
167+
}
168+
}
169+
170+
tick-1s {
171+
printf("=== Exit summary, one second ending at %Y ===\n",
172+
walltimestamp);
173+
printf(" %8s %s\n", "SVM code", "Count");
174+
printa(" %8x %@8u\n", @exits);
175+
176+
printf("IOIO SVM exits:\n");
177+
printf(" %4s %3s %4s %4s %5s\n",
178+
"Port", "Op", "OpSz", "Addr", "Count");
179+
printa(" %4x %3s %4s %4s %@5d\n", @io_info);
180+
181+
printf("NPF SVM exits:\n");
182+
printf(" %-16s %-16s %5s %s\n",
183+
"Guest PA", "Guest RIP", "#PF flags", "Count");
184+
printa(" %16x %16x %5s%s%s%s%s %@8u\n", @npf_info);
185+
186+
printf("Unhandled by svm_vmexit():\n");
187+
printf(" %8s %s\n", "SVM code", "Count");
188+
printa(" %8x %@8u\n", @unhandled_vmexits);
189+
190+
printf("vmrun() VM_EXITCODE_BOGUS reasons:\n");
191+
printa(" %20s %@8u\n", @bogus_reasons);
192+
193+
if (misunderstood_exits > 0) {
194+
printf("Exits this script did not understand: %d\n",
195+
misunderstood_exits);
196+
misunderstood_exits = 0;
197+
}
198+
199+
printf("\n");
200+
201+
/*
202+
* Clear all accumulated data, but keep the most common keys to churn a
203+
* little less if there is relatively little activity and a key flips
204+
* from zero to non-zero counts regularly.
205+
*/
206+
trunc(@exits, 10);
207+
clear(@exits);
208+
trunc(@io_info, 10);
209+
clear(@io_info);
210+
trunc(@npf_info, 10);
211+
clear(@npf_info);
212+
trunc(@bogus_reasons, 10);
213+
clear(@bogus_reasons);
214+
trunc(@unhandled_vmexits, 10);
215+
clear(@unhandled_vmexits);
216+
}

0 commit comments

Comments
 (0)