Skip to content

Commit 9abd890

Browse files
committed
Add per-client backpressure
1 parent bf25e38 commit 9abd890

File tree

3 files changed

+86
-2
lines changed

3 files changed

+86
-2
lines changed

upstairs/src/client.rs

+51-2
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,22 @@ use crate::{
1010
use crucible_common::x509::TLSContext;
1111
use crucible_protocol::{ReconciliationId, CRUCIBLE_MESSAGE_VERSION};
1212

13-
use std::{collections::BTreeSet, net::SocketAddr, sync::Arc};
13+
use std::{
14+
collections::BTreeSet,
15+
net::SocketAddr,
16+
sync::{
17+
atomic::{AtomicU64, Ordering},
18+
Arc,
19+
},
20+
};
1421

1522
use futures::StreamExt;
1623
use slog::{debug, error, info, o, warn, Logger};
1724
use tokio::{
1825
io::AsyncWriteExt,
1926
net::{TcpSocket, TcpStream},
2027
sync::{mpsc, oneshot},
21-
time::sleep_until,
28+
time::{sleep_until, Duration},
2229
};
2330
use tokio_util::codec::{Encoder, FramedRead};
2431
use uuid::Uuid;
@@ -216,6 +223,9 @@ pub(crate) struct DownstairsClient {
216223

217224
/// Session ID for a clients connection to a downstairs.
218225
connection_id: ConnectionId,
226+
227+
/// Per-client delay, shared with the [`DownstairsClient`]
228+
client_delay_us: Arc<AtomicU64>,
219229
}
220230

221231
impl DownstairsClient {
@@ -226,6 +236,7 @@ impl DownstairsClient {
226236
log: Logger,
227237
tls_context: Option<Arc<crucible_common::x509::TLSContext>>,
228238
) -> Self {
239+
let client_delay_us = Arc::new(AtomicU64::new(0));
229240
Self {
230241
cfg,
231242
client_task: Self::new_io_task(
@@ -234,6 +245,7 @@ impl DownstairsClient {
234245
false, // do not start the task until GoActive
235246
client_id,
236247
tls_context.clone(),
248+
client_delay_us.clone(),
237249
&log,
238250
),
239251
client_id,
@@ -253,6 +265,7 @@ impl DownstairsClient {
253265
repair_info: None,
254266
io_state_count: ClientIOStateCount::new(),
255267
connection_id: ConnectionId(0),
268+
client_delay_us,
256269
}
257270
}
258271

@@ -262,6 +275,7 @@ impl DownstairsClient {
262275
/// client will disappear into the void.
263276
#[cfg(test)]
264277
fn test_default() -> Self {
278+
let client_delay_us = Arc::new(AtomicU64::new(0));
265279
let cfg = Arc::new(UpstairsConfig {
266280
encryption_context: None,
267281
upstairs_id: Uuid::new_v4(),
@@ -290,6 +304,7 @@ impl DownstairsClient {
290304
repair_info: None,
291305
io_state_count: ClientIOStateCount::new(),
292306
connection_id: ConnectionId(0),
307+
client_delay_us,
293308
}
294309
}
295310

@@ -652,6 +667,7 @@ impl DownstairsClient {
652667
connect,
653668
self.client_id,
654669
self.tls_context.clone(),
670+
self.client_delay_us.clone(),
655671
&self.log,
656672
);
657673
}
@@ -662,6 +678,7 @@ impl DownstairsClient {
662678
connect: bool,
663679
client_id: ClientId,
664680
tls_context: Option<Arc<TLSContext>>,
681+
client_delay_us: Arc<AtomicU64>,
665682
log: &Logger,
666683
) -> ClientTaskHandle {
667684
#[cfg(test)]
@@ -672,6 +689,7 @@ impl DownstairsClient {
672689
connect,
673690
client_id,
674691
tls_context,
692+
client_delay_us,
675693
log,
676694
)
677695
} else {
@@ -685,6 +703,7 @@ impl DownstairsClient {
685703
connect,
686704
client_id,
687705
tls_context,
706+
client_delay_us,
688707
log,
689708
)
690709
}
@@ -695,6 +714,7 @@ impl DownstairsClient {
695714
connect: bool,
696715
client_id: ClientId,
697716
tls_context: Option<Arc<TLSContext>>,
717+
client_delay_us: Arc<AtomicU64>,
698718
log: &Logger,
699719
) -> ClientTaskHandle {
700720
// These channels must support at least MAX_ACTIVE_COUNT messages;
@@ -730,6 +750,7 @@ impl DownstairsClient {
730750
log: log.clone(),
731751
},
732752
delay,
753+
client_delay_us,
733754
log,
734755
};
735756
c.run().await
@@ -1242,6 +1263,8 @@ impl DownstairsClient {
12421263
return false;
12431264
}
12441265

1266+
job.reply_time[self.client_id] = Some(std::time::Instant::now());
1267+
12451268
let mut jobs_completed_ok = job.state_count().completed_ok();
12461269
let mut ackable = false;
12471270

@@ -2220,6 +2243,11 @@ impl DownstairsClient {
22202243
None
22212244
}
22222245
}
2246+
2247+
/// Sets the per-client delay
2248+
pub(crate) fn set_delay_us(&self, delay: u64) {
2249+
self.client_delay_us.store(delay, Ordering::Relaxed);
2250+
}
22232251
}
22242252

22252253
/// How to handle "promote to active" requests
@@ -2420,6 +2448,9 @@ struct ClientIoTask {
24202448
/// Handle for the rx task
24212449
recv_task: ClientRxTask,
24222450

2451+
/// Shared handle to receive per-client backpressure delay
2452+
client_delay_us: Arc<AtomicU64>,
2453+
24232454
log: Logger,
24242455
}
24252456

@@ -2692,6 +2723,24 @@ impl ClientIoTask {
26922723
+ std::marker::Send
26932724
+ 'static,
26942725
{
2726+
// Delay communication with this client based on backpressure, to keep
2727+
// the three clients relatively in sync with each other.
2728+
//
2729+
// We don't need to delay writes, because they're already constrained by
2730+
// the global backpressure system and cannot build up an unbounded
2731+
// queue. This is admittedly quite subtle; see crucible#1167 for
2732+
// discussions and graphs.
2733+
if !matches!(
2734+
m,
2735+
ClientRequest::Message(Message::Write { .. })
2736+
| ClientRequest::RawMessage(RawMessage::Write { .. }, ..)
2737+
) {
2738+
let d = self.client_delay_us.load(Ordering::Relaxed);
2739+
if d > 0 {
2740+
tokio::time::sleep(Duration::from_micros(d)).await;
2741+
}
2742+
}
2743+
26952744
// There's some duplication between this function and `cmd_loop` above,
26962745
// but it's not obvious whether there's a cleaner way to organize stuff.
26972746
tokio::select! {

upstairs/src/downstairs.rs

+32
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,7 @@ impl Downstairs {
13261326
guest_id: gw_id,
13271327
work: noop_ioop,
13281328
state: ClientData::new(IOState::New),
1329+
reply_time: ClientData::new(None),
13291330
acked: false,
13301331
replay: false,
13311332
data: None,
@@ -1449,6 +1450,7 @@ impl Downstairs {
14491450
guest_id: gw_id,
14501451
work: repair_ioop,
14511452
state: ClientData::new(IOState::New),
1453+
reply_time: ClientData::new(None),
14521454
acked: false,
14531455
replay: false,
14541456
data: None,
@@ -1589,6 +1591,7 @@ impl Downstairs {
15891591
guest_id: gw_id,
15901592
work: reopen_ioop,
15911593
state: ClientData::new(IOState::New),
1594+
reply_time: ClientData::new(None),
15921595
acked: false,
15931596
replay: false,
15941597
data: None,
@@ -1662,6 +1665,7 @@ impl Downstairs {
16621665
guest_id: gw_id,
16631666
work: aread,
16641667
state: ClientData::new(IOState::New),
1668+
reply_time: ClientData::new(None),
16651669
acked: false,
16661670
replay: false,
16671671
data: None,
@@ -1732,6 +1736,7 @@ impl Downstairs {
17321736
guest_id: gw_id,
17331737
work: awrite,
17341738
state: ClientData::new(IOState::New),
1739+
reply_time: ClientData::new(None),
17351740
acked: false,
17361741
replay: false,
17371742
data: None,
@@ -1762,6 +1767,7 @@ impl Downstairs {
17621767
guest_id: gw_id,
17631768
work: close_ioop,
17641769
state: ClientData::new(IOState::New),
1770+
reply_time: ClientData::new(None),
17651771
acked: false,
17661772
replay: false,
17671773
data: None,
@@ -2087,6 +2093,7 @@ impl Downstairs {
20872093
guest_id: gw_id,
20882094
work: flush,
20892095
state: ClientData::new(IOState::New),
2096+
reply_time: ClientData::new(None),
20902097
acked: false,
20912098
replay: false,
20922099
data: None,
@@ -2211,6 +2218,7 @@ impl Downstairs {
22112218
guest_id,
22122219
work: aread,
22132220
state: ClientData::new(IOState::New),
2221+
reply_time: ClientData::new(None),
22142222
acked: false,
22152223
replay: false,
22162224
data: None,
@@ -3166,6 +3174,30 @@ impl Downstairs {
31663174
self.ackable_work.insert(ds_id);
31673175
}
31683176

3177+
if job.reply_time.iter().all(Option::is_some) && !job.replay {
3178+
let fastest_id = ClientId::iter()
3179+
.min_by_key(|i| job.reply_time[*i].unwrap())
3180+
.unwrap();
3181+
let slowest_time = *job.reply_time.iter().flatten().max().unwrap();
3182+
3183+
// Apply a delay to the fastest client, and clear the delay for the
3184+
// other two clients.
3185+
for i in ClientId::iter() {
3186+
let delay_time_us: u64 = if i == fastest_id {
3187+
// 0 delay below 10ms of lead time, then linearly increasing
3188+
// to a maximum of 10 ms. These are all roughly-eyeballed
3189+
// numbers!
3190+
let dt = slowest_time - job.reply_time[i].unwrap();
3191+
let lead_time_us: u64 = dt.as_micros().try_into().unwrap();
3192+
(lead_time_us.saturating_sub(10_000) / 100).min(10_000)
3193+
} else {
3194+
0
3195+
};
3196+
3197+
self.clients[i].set_delay_us(delay_time_us);
3198+
}
3199+
}
3200+
31693201
/*
31703202
* If all 3 jobs are done, we can check here to see if we can
31713203
* remove this job from the DS list. If we have completed the ack

upstairs/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,9 @@ struct DownstairsIO {
889889
/// Map of work status, tracked on a per-client basis
890890
state: ClientData<IOState>,
891891

892+
/// At what time did we hear a reply?
893+
reply_time: ClientData<Option<std::time::Instant>>,
894+
892895
/*
893896
* Has this been acked to the guest yet?
894897
*/

0 commit comments

Comments
 (0)