90
90
//! [`ensure`]: crate::vm::ensure
91
91
92
92
use std:: {
93
- collections:: VecDeque ,
94
93
sync:: { Arc , Mutex } ,
95
94
time:: Duration ,
96
95
} ;
@@ -142,6 +141,12 @@ pub(super) enum VmStartReason {
142
141
ExplicitRequest ,
143
142
}
144
143
144
+ enum VmStartOutcome {
145
+ Succeeded ,
146
+ Failed ,
147
+ Aborted ,
148
+ }
149
+
145
150
/// A kind of event the state driver can handle.
146
151
#[ derive( Debug ) ]
147
152
enum InputQueueEvent {
@@ -154,10 +159,6 @@ struct InputQueueInner {
154
159
/// State change requests from the external API.
155
160
external_requests : request_queue:: ExternalRequestQueue ,
156
161
157
- /// State change requests from the external API that were previously read
158
- /// but not handled immediately.
159
- self_request : VecDeque < ExternalRequest > ,
160
-
161
162
/// State change requests from the VM's components. These take precedence
162
163
/// over external state change requests.
163
164
guest_events : super :: guest_event:: GuestEventQueue ,
@@ -170,7 +171,6 @@ impl InputQueueInner {
170
171
log, auto_start,
171
172
) ,
172
173
guest_events : super :: guest_event:: GuestEventQueue :: default ( ) ,
173
- self_request : Default :: default ( ) ,
174
174
}
175
175
}
176
176
}
@@ -209,9 +209,6 @@ impl InputQueue {
209
209
/// - Guest events: These are signals raised from the VM's vCPUs and
210
210
/// devices (e.g. a request to reboot or halt the VM arising from a vCPU
211
211
/// asserting a virtual chipset signal).
212
- /// - Self-requests: The state driver may buffer external requests for
213
- /// later processing by pushing them to the self-request queue. See
214
- /// [`Self::push_self_request`].
215
212
/// - External requests: These are state change requests received via the
216
213
/// server API. See [`super::request_queue`] for more details about how
217
214
/// these requests are queued.
@@ -228,8 +225,6 @@ impl InputQueue {
228
225
let mut guard = self . inner . lock ( ) . unwrap ( ) ;
229
226
if let Some ( guest_event) = guard. guest_events . pop_front ( ) {
230
227
return InputQueueEvent :: GuestEvent ( guest_event) ;
231
- } else if let Some ( req) = guard. self_request . pop_front ( ) {
232
- return InputQueueEvent :: ExternalRequest ( req) ;
233
228
} else if let Some ( req) = guard. external_requests . pop_front ( ) {
234
229
return InputQueueEvent :: ExternalRequest ( req) ;
235
230
}
@@ -246,19 +241,6 @@ impl InputQueue {
246
241
}
247
242
}
248
243
249
- /// Pushes a self-requested state change request to this queue.
250
- ///
251
- /// This routine may only be called from the state driver task.
252
- fn push_self_request ( & self , req : ExternalRequest ) {
253
- let mut guard = self . inner . lock ( ) . unwrap ( ) ;
254
- guard. self_request . push_back ( req) ;
255
-
256
- // Since this routine is only called from the state driver task, the
257
- // driver is by definition not waiting for a new event at this point, so
258
- // it's not necessary to signal the notify here (the next call to
259
- // dequeue an event will always pick this event up).
260
- }
261
-
262
244
/// Notifies the external request queue that the state driver has completed
263
245
/// a request from that queue.
264
246
fn notify_request_completed ( & self , state : CompletedRequest ) {
@@ -508,8 +490,9 @@ impl StateDriver {
508
490
509
491
let final_state = if migrated_in {
510
492
match self . start_vm ( VmStartReason :: MigratedIn ) . await {
511
- Ok ( ( ) ) => self . event_loop ( ) . await ,
512
- Err ( _) => InstanceState :: Failed ,
493
+ VmStartOutcome :: Succeeded => self . event_loop ( ) . await ,
494
+ VmStartOutcome :: Failed => InstanceState :: Failed ,
495
+ VmStartOutcome :: Aborted => InstanceState :: Destroyed ,
513
496
}
514
497
} else {
515
498
self . event_loop ( ) . await
@@ -552,7 +535,7 @@ impl StateDriver {
552
535
async fn start_vm (
553
536
& mut self ,
554
537
start_reason : VmStartReason ,
555
- ) -> anyhow :: Result < ( ) > {
538
+ ) -> VmStartOutcome {
556
539
info ! ( self . log, "starting instance" ; "reason" => ?start_reason) ;
557
540
558
541
// Tell listeners that the VM's components are now starting up and not
@@ -591,9 +574,9 @@ impl StateDriver {
591
574
for ( name, dev) in objects. device_map ( ) {
592
575
info ! ( self . log, "sending start request to {}" , name) ;
593
576
let res = dev. start ( ) ;
594
- if let Err ( e) = & res {
577
+ if let Err ( e) = res {
595
578
error ! ( self . log, "startup failed for {}: {:?}" , name, e) ;
596
- return res ;
579
+ return VmStartOutcome :: Failed ;
597
580
}
598
581
}
599
582
@@ -642,82 +625,85 @@ impl StateDriver {
642
625
// the VM while it was being started. If such a request is seen, send a
643
626
// self-request to stop just before returning so that the VM will stop
644
627
// immediately.
645
- let mut stopped_while_starting = false ;
628
+ enum Selection {
629
+ BackendFuture ( anyhow:: Result < ( ) > ) ,
630
+ Event ( InputQueueEvent ) ,
631
+ }
646
632
loop {
647
- let event = tokio:: select! {
633
+ let selection = tokio:: select! {
648
634
// If the VM successfully starts, return immediately and let
649
635
// the caller process any events that may happen to be on the
650
636
// queue.
651
637
biased;
652
638
653
639
res = & mut block_backend_fut => {
654
- // If the VM started up successfully, publish that it is
655
- // running and queue up any external requests that were
656
- // deferred while startup was ongoing.
657
- //
658
- // If startup failed, just return the error without changing
659
- // any state or processing any additional requests. The
660
- // caller will move the instance to the appropriate terminal
661
- // state and clean up the VM as needed.
662
- if res. is_ok( ) {
663
- let objects = & self . objects;
664
- objects. lock_exclusive( ) . await . resume_vcpus( ) ;
665
- self . external_state
666
- . update( ExternalStateUpdate :: Instance ( InstanceState :: Running ) ) ;
667
-
668
- self . input_queue. notify_request_completed(
669
- CompletedRequest :: Start { succeeded: true } ,
670
- ) ;
671
-
672
- if stopped_while_starting {
673
- self . input_queue. push_self_request(
674
- ExternalRequest :: stop( )
675
- ) ;
676
- }
677
-
678
- info!( & self . log, "VM successfully started" ) ;
679
- } else {
680
- self . input_queue. notify_request_completed(
681
- CompletedRequest :: Start { succeeded: false } ,
682
- ) ;
683
- }
684
-
685
- return res;
640
+ Selection :: BackendFuture ( res)
686
641
}
687
642
688
- dequeued = self . input_queue. wait_for_next_event( ) => {
689
- dequeued
643
+ event = self . input_queue. wait_for_next_event( ) => {
644
+ Selection :: Event ( event )
690
645
}
691
646
} ;
692
647
693
- // The VM's vCPUs only start when the block backend startup future
694
- // resolves and is selected above. If control reached that point,
695
- // that branch wasn't selected, so the vCPUs should still be paused,
696
- // which means the dequeued event should not be a guest event.
697
- let InputQueueEvent :: ExternalRequest ( req) = event else {
698
- unreachable ! ( "can't get guest events before vCPUs start" ) ;
648
+ let req: ExternalRequest = match selection {
649
+ Selection :: BackendFuture ( Ok ( ( ) ) ) => {
650
+ let objects = & self . objects ;
651
+ objects. lock_exclusive ( ) . await . resume_vcpus ( ) ;
652
+ self . external_state . update ( ExternalStateUpdate :: Instance (
653
+ InstanceState :: Running ,
654
+ ) ) ;
655
+
656
+ self . input_queue . notify_request_completed (
657
+ CompletedRequest :: Start { succeeded : true } ,
658
+ ) ;
659
+
660
+ info ! ( & self . log, "VM successfully started" ) ;
661
+ return VmStartOutcome :: Succeeded ;
662
+ }
663
+
664
+ Selection :: BackendFuture ( Err ( e) ) => {
665
+ info ! ( & self . log, "VM startup failed: {e}" ) ;
666
+ self . input_queue . notify_request_completed (
667
+ CompletedRequest :: Start { succeeded : false } ,
668
+ ) ;
669
+
670
+ return VmStartOutcome :: Failed ;
671
+ }
672
+
673
+ // The VM's vCPUs only start when the block backend startup
674
+ // future resolves and is selected above. If control reached
675
+ // that point, that branch wasn't selected, so the vCPUs should
676
+ // still be paused, which means the dequeued event should not be
677
+ // a guest event.
678
+ Selection :: Event ( InputQueueEvent :: GuestEvent ( _) ) => {
679
+ unreachable ! ( "can't get guest events before vCPUs start" )
680
+ }
681
+
682
+ Selection :: Event ( InputQueueEvent :: ExternalRequest ( req) ) => req,
699
683
} ;
700
684
701
- // Handle requests to reconfigure one of the existing Crucible
702
- // volumes inline, but buffer other requests so that they can be
703
- // handled after the VM has finished starting.
704
- //
705
- // Buffering some requests and servicing others can theoretically
706
- // change the order in which those requests are retired. That's not
707
- // a problem here because the request queue will stop accepting new
708
- // VCR change requests once a request to stop has been queued.
709
685
match req {
710
686
ExternalRequest :: State ( StateChangeRequest :: Stop ) => {
711
687
info ! (
712
688
& self . log,
713
689
"got request to stop while still starting"
714
690
) ;
715
691
716
- // Remember that the VM should stop once it has started.
717
- // It's not safe to issue a self-request here because the
718
- // next loop iteration will simply pop the self-request back
719
- // off the queue and reach this path once more.
720
- stopped_while_starting = true ;
692
+ // Don't send any pause/halt notifications here, since
693
+ // (depending on what async work was in flight when this
694
+ // notification was received) there may be a
695
+ // partially-started component that is not prepared to be
696
+ // paused and halted. Instead, simply move the VM to
697
+ // Stopped, return an "aborted" status, and let the caller
698
+ // arrange to drop all the VM's components. (Note that no
699
+ // vCPUs have started yet, so no guest work is in flight at
700
+ // this point.)
701
+ self . external_state . update ( ExternalStateUpdate :: Instance (
702
+ InstanceState :: Stopped ,
703
+ ) ) ;
704
+
705
+ self . input_queue . notify_stopped ( ) ;
706
+ return VmStartOutcome :: Aborted ;
721
707
}
722
708
ExternalRequest :: Component (
723
709
ComponentChangeRequest :: ReconfigureCrucibleVolume {
@@ -810,10 +796,13 @@ impl StateDriver {
810
796
match request {
811
797
ExternalRequest :: State ( StateChangeRequest :: Start ) => {
812
798
match self . start_vm ( VmStartReason :: ExplicitRequest ) . await {
813
- Ok ( ( ) ) => HandleEventOutcome :: Continue ,
814
- Err ( _ ) => HandleEventOutcome :: Exit {
799
+ VmStartOutcome :: Succeeded => HandleEventOutcome :: Continue ,
800
+ VmStartOutcome :: Failed => HandleEventOutcome :: Exit {
815
801
final_state : InstanceState :: Failed ,
816
802
} ,
803
+ VmStartOutcome :: Aborted => HandleEventOutcome :: Exit {
804
+ final_state : InstanceState :: Destroyed ,
805
+ } ,
817
806
}
818
807
}
819
808
ExternalRequest :: State ( StateChangeRequest :: MigrateAsSource {
0 commit comments