@@ -6,6 +6,8 @@ use std::collections::HashMap;
6
6
use std:: str:: FromStr ;
7
7
use std:: time:: Duration ;
8
8
9
+ use propolis_client:: types:: HyperVFeatureFlag ;
10
+
9
11
use phd_framework:: test_vm:: { FakeOximeterSampler , MetricsLocation } ;
10
12
11
13
use chrono:: { DateTime , Utc } ;
@@ -35,7 +37,7 @@ struct VcpuUsageMetric {
35
37
///
36
38
/// Oximeter producers produce a series of lists of samples, where each list
37
39
/// of samples is conceptually distinct but may still be interesting to
38
- /// tset . In `propolis-server`, the first list of samples will be
40
+ /// test . In `propolis-server`, the first list of samples will be
39
41
/// `virtual_machine:vcpu_usage`, which may be blank if kstats have not been
40
42
/// sampled since the last producer poll. The second list of samples
41
43
/// will be `virtual_machine:reset`.
@@ -215,6 +217,18 @@ async fn instance_vcpu_stats(ctx: &Framework) {
215
217
env. metrics ( Some ( MetricsLocation :: Local ) ) ;
216
218
217
219
let mut vm_config = ctx. vm_config_builder ( "instance_vcpu_stats" ) ;
220
+ vm_config. guest_hv_interface (
221
+ propolis_client:: types:: GuestHypervisorInterface :: HyperV {
222
+ features : vec ! [ HyperVFeatureFlag :: ReferenceTsc ] ,
223
+ } ,
224
+ ) ;
225
+ // Having one CPU simplifies the math for time expectations later in the
226
+ // test. One CPU means one second per second of time across all one vCPU's
227
+ // microstates, and if we have caused guest load the guest vCPU should
228
+ // be in "run" basically entirely until the load completes.
229
+ //
230
+ // Using the (configurable!) default "could" work, but this lets us avoid
231
+ // having to for additional probably-idle CPUs.
218
232
vm_config. cpus ( 1 ) ;
219
233
220
234
let mut vm = ctx. spawn_vm ( & vm_config, Some ( & env) ) . await ?;
@@ -367,26 +381,31 @@ async fn instance_vcpu_stats(ctx: &Framework) {
367
381
- start_metrics. vcpu_state_total ( & VcpuState :: Emulation ) )
368
382
as u128 ;
369
383
370
- // Pick 100ms as a comically high upper bound for how much time might have
371
- // been spent emulating instructions on the guest's behalf. Anecdotally the
372
- // this is on the order of 8ms between the two samples. This should be very
373
- // low; the workload is almost entirely guest user mode execution.
384
+ // Theoretically 100ms would be a comically high upper bound for how much
385
+ // time we've spent emulating instructions on the guest's behalf during this
386
+ // test. In reality, the situation is more subtle. Guest OSes can be
387
+ // surprisingly heavy on the APIC, and if they believe the TSC is
388
+ // unreliable, heavy on the ACPI PM timer too. We've at least set up guest
389
+ // enlightments to present a reliable TSC, so as long as the guest picks up
390
+ // that enlightenment and does not fall back to the ACPI PM timer, that
391
+ // source of instruction emulation activity is quashed.
392
+ //
393
+ // So, it's hard to make a universal statement about how much time should be
394
+ // spent emulating instructions here. Instead, only check this if we know
395
+ // the guest is going to result in predictable times. Specifically: expect
396
+ // that Linux doesn't use the APIC much while idle and trusts the HyperV TSC
397
+ // enlightenment, and so is a candidate for reliable assertions on
398
+ // instruction emulation time.
374
399
if vm. guest_os_kind ( ) . is_linux ( ) {
375
- // Unfortunately, the above is currently too optimistic in the general
376
- // case of arbitrary guest OSes - if a guest OS has idle services, and
377
- // those idle services involve checking the current time, and the guest
378
- // has determined the TSC is unreliable, we may count substantial
379
- // emulation time due to emulating guest accesses to the ACPI PM timer.
380
- //
381
- // Linux guests are known to not (currently?) consult the current time
382
- // if fully idle, so we can be more precise about emulation time
383
- // assertions.
384
- const LIMIT : u128 = Duration :: from_millis ( 100 ) . as_nanos ( ) ;
400
+ const EMUL_LIMIT : u128 = Duration :: from_millis ( 100 ) . as_nanos ( ) ;
401
+ // As of writing this test, `full_emul_delta` is around 12-13ms with an
402
+ // Alpine guest. 100ms is hopefully plenty of margin for slower or
403
+ // busier test systems, or reasonable implementation changes.
385
404
assert ! (
386
- full_emul_delta < LIMIT ,
405
+ full_emul_delta < EMUL_LIMIT ,
387
406
"full emul delta was above threshold: {} > {}" ,
388
407
full_emul_delta,
389
- LIMIT
408
+ EMUL_LIMIT
390
409
) ;
391
410
} else {
392
411
warn ! (
@@ -400,12 +419,12 @@ async fn instance_vcpu_stats(ctx: &Framework) {
400
419
// short duration, and on my workstation this is around 400 microseconds.
401
420
// Again, test against a significantly larger threshold in case CI is
402
421
// extremely slow.
403
- const LIMIT : u128 = Duration :: from_millis ( 20 ) . as_nanos ( ) ;
422
+ const WAIT_LIMIT : u128 = Duration :: from_millis ( 20 ) . as_nanos ( ) ;
404
423
assert ! (
405
- full_waiting_delta < LIMIT ,
424
+ full_waiting_delta < WAIT_LIMIT ,
406
425
"full waiting delta was above threshold: {} > {}" ,
407
426
full_waiting_delta,
408
- LIMIT
427
+ WAIT_LIMIT
409
428
) ;
410
429
411
430
trace ! ( "run: {}" , full_run_delta) ;
0 commit comments