Skip to content

Commit d4d9cae

Browse files
authored
Move more backpressure stuff into module (#1434)
(Staged on top of #1431) This is purely reorganization, with no functional changes: - Move backpressure config into `backpressure` module - Since the config consists of two independent channels (bytes and queue length), add a new `struct BackpressureChannelConfig` and use two copies of it
1 parent 45e24ee commit d4d9cae

File tree

2 files changed

+128
-121
lines changed

2 files changed

+128
-121
lines changed

upstairs/src/backpressure.rs

+122-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
// Copyright 2024 Oxide Computer Company
22

3-
use crate::{ClientId, DownstairsIO, IOop};
3+
use crate::{
4+
ClientId, DownstairsIO, IOop, IO_OUTSTANDING_MAX_BYTES,
5+
IO_OUTSTANDING_MAX_JOBS,
6+
};
7+
use std::time::Duration;
48

59
/// Helper struct to contain a count of backpressure bytes
610
#[derive(Debug)]
@@ -43,3 +47,120 @@ impl BackpressureBytes {
4347
}
4448
}
4549
}
50+
51+
/// Configuration for host-side backpressure
52+
///
53+
/// Backpressure adds an artificial delay to host write messages (which are
54+
/// otherwise acked immediately, before actually being complete). The delay is
55+
/// varied based on two metrics:
56+
///
57+
/// - number of write bytes outstanding
58+
/// - queue length (in jobs)
59+
///
60+
/// We compute backpressure delay based on both metrics, then pick the larger of
61+
/// the two delays.
62+
#[derive(Copy, Clone, Debug)]
63+
pub struct BackpressureConfig {
64+
pub bytes: BackpressureChannelConfig,
65+
pub queue: BackpressureChannelConfig,
66+
}
67+
68+
impl Default for BackpressureConfig {
69+
fn default() -> BackpressureConfig {
70+
BackpressureConfig {
71+
// Byte-based backpressure
72+
bytes: BackpressureChannelConfig {
73+
start: 50 * 1024u64.pow(2), // 50 MiB
74+
max: IO_OUTSTANDING_MAX_BYTES * 2,
75+
scale: Duration::from_millis(100),
76+
},
77+
78+
// Queue-based backpressure
79+
queue: BackpressureChannelConfig {
80+
start: 500,
81+
max: IO_OUTSTANDING_MAX_JOBS as u64 * 2,
82+
scale: Duration::from_millis(5),
83+
},
84+
}
85+
}
86+
}
87+
88+
#[derive(Copy, Clone, Debug)]
89+
pub struct BackpressureChannelConfig {
90+
/// When should backpressure start
91+
pub start: u64,
92+
/// Value at which backpressure goes to infinity
93+
pub max: u64,
94+
/// Scale of backpressure
95+
pub scale: Duration,
96+
}
97+
98+
impl BackpressureChannelConfig {
99+
fn get_backpressure(&self, value: u64) -> Duration {
100+
// Saturate at 1 hour per job, which is basically infinite
101+
if value >= self.max {
102+
return Duration::from_secs(60 * 60);
103+
}
104+
105+
// These ratios start at 0 (at *_start) and hit 1 when backpressure
106+
// should be infinite.
107+
let frac = value.saturating_sub(self.start) as f64
108+
/ (self.max - self.start) as f64;
109+
110+
// Delay should be 0 at frac = 0, and infinite at frac = 1
111+
let frac = frac * 2.0;
112+
let v = if frac < 1.0 {
113+
frac
114+
} else {
115+
1.0 / (1.0 - (frac - 1.0))
116+
};
117+
self.scale.mul_f64(v.powi(2))
118+
}
119+
}
120+
121+
impl BackpressureConfig {
122+
pub fn get_backpressure_us(&self, bytes: u64, jobs: u64) -> u64 {
123+
let bp_bytes = self.bytes.get_backpressure(bytes).as_micros() as u64;
124+
let bp_queue = self.queue.get_backpressure(jobs).as_micros() as u64;
125+
bp_bytes.max(bp_queue)
126+
}
127+
}
128+
129+
#[cfg(test)]
130+
mod test {
131+
use super::*;
132+
133+
#[test]
134+
fn check_max_backpressure() {
135+
let cfg = BackpressureConfig::default();
136+
let t = cfg.get_backpressure_us(
137+
IO_OUTSTANDING_MAX_BYTES * 2 - 1024u64.pow(2),
138+
0,
139+
);
140+
let timeout = Duration::from_micros(t);
141+
println!(
142+
"max byte-based delay: {}",
143+
humantime::format_duration(timeout)
144+
);
145+
assert!(
146+
timeout > Duration::from_secs(60 * 60),
147+
"max byte-based backpressure delay is too low;
148+
expected > 1 hr, got {}",
149+
humantime::format_duration(timeout)
150+
);
151+
152+
let t =
153+
cfg.get_backpressure_us(0, IO_OUTSTANDING_MAX_JOBS as u64 * 2 - 1);
154+
let timeout = Duration::from_micros(t);
155+
println!(
156+
"max job-based delay: {}",
157+
humantime::format_duration(timeout)
158+
);
159+
assert!(
160+
timeout > Duration::from_secs(60 * 60),
161+
"max job-based backpressure delay is too low;
162+
expected > 1 hr, got {}",
163+
humantime::format_duration(timeout)
164+
);
165+
}
166+
}

upstairs/src/guest.rs

+6-120
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ use std::{
1010
};
1111

1212
use crate::{
13-
BlockIO, BlockOp, BlockOpWaiter, BlockRes, Buffer, JobId, RawReadResponse,
14-
ReplaceResult, UpstairsAction, IO_OUTSTANDING_MAX_BYTES,
15-
IO_OUTSTANDING_MAX_JOBS,
13+
backpressure::BackpressureConfig, BlockIO, BlockOp, BlockOpWaiter,
14+
BlockRes, Buffer, JobId, RawReadResponse, ReplaceResult, UpstairsAction,
1615
};
1716
use crucible_common::{build_logger, Block, BlockIndex, CrucibleError};
1817
use crucible_protocol::SnapshotDetails;
@@ -297,71 +296,6 @@ pub struct Guest {
297296
log: Logger,
298297
}
299298

300-
/// Configuration for host-side backpressure
301-
///
302-
/// Backpressure adds an artificial delay to host write messages (which are
303-
/// otherwise acked immediately, before actually being complete). The delay is
304-
/// varied based on two metrics:
305-
///
306-
/// - number of write bytes outstanding (as a fraction of max)
307-
/// - queue length as a fraction (where 1.0 is full)
308-
///
309-
/// These two metrics are used for quadratic backpressure, picking the larger of
310-
/// the two delays.
311-
#[derive(Copy, Clone, Debug)]
312-
struct BackpressureConfig {
313-
/// When should backpressure start, in units of bytes
314-
bytes_start: u64,
315-
/// Maximum number of bytes (i.e. backpressure goes to infinity)
316-
bytes_max: u64,
317-
/// Scale of bytes-based backpressure
318-
bytes_scale: Duration,
319-
320-
/// When should backpressure start, in units of jobs
321-
queue_start: u64,
322-
/// Maximum number of jobs (i.e. backpressure goes to infinity)
323-
queue_max: u64,
324-
/// Scale of queue-based delay
325-
queue_scale: Duration,
326-
}
327-
328-
impl BackpressureConfig {
329-
// Our chosen backpressure curve is quadratic for 1/2 of its range, then
330-
// goes to infinity in the second half. This gives C0 + C1 continuity.
331-
fn curve(frac: f64, scale: Duration) -> Duration {
332-
// Remap from 0-1 to 0-1.5 for ease of calculation
333-
let frac = frac * 2.0;
334-
let v = if frac < 1.0 {
335-
frac
336-
} else {
337-
1.0 / (1.0 - (frac - 1.0))
338-
};
339-
scale.mul_f64(v.powi(2))
340-
}
341-
342-
fn get_backpressure_us(&self, bytes: u64, jobs: u64) -> u64 {
343-
// Saturate at 1 hour per job, which is basically infinite
344-
if bytes >= self.bytes_max || jobs >= self.queue_max {
345-
return Duration::from_secs(60 * 60).as_micros() as u64;
346-
}
347-
348-
// These ratios start at 0 (at *_start) and hit 1 when backpressure
349-
// should be infinite.
350-
let jobs_frac = jobs.saturating_sub(self.queue_start) as f64
351-
/ (self.queue_max - self.queue_start) as f64;
352-
let bytes_frac = bytes.saturating_sub(self.bytes_start) as f64
353-
/ (self.bytes_max - self.bytes_start) as f64;
354-
355-
// Delay should be 0 at frac = 0, and infinite at frac = 1
356-
let delay_bytes =
357-
Self::curve(bytes_frac, self.bytes_scale).as_micros() as u64;
358-
let delay_jobs =
359-
Self::curve(jobs_frac, self.queue_scale).as_micros() as u64;
360-
361-
delay_bytes.max(delay_jobs)
362-
}
363-
}
364-
365299
/*
366300
* These methods are how to add or checking for new work on the Guest struct
367301
*/
@@ -404,7 +338,7 @@ impl Guest {
404338
iop_tokens: 0,
405339
bw_tokens: 0,
406340
backpressure_us: backpressure_us.clone(),
407-
backpressure_config: Self::default_backpressure_config(),
341+
backpressure_config: BackpressureConfig::default(),
408342
log: log.clone(),
409343
};
410344
let guest = Guest {
@@ -419,20 +353,6 @@ impl Guest {
419353
(guest, io)
420354
}
421355

422-
fn default_backpressure_config() -> BackpressureConfig {
423-
BackpressureConfig {
424-
// Byte-based backpressure
425-
bytes_start: 50 * 1024u64.pow(2), // 50 MiB
426-
bytes_max: IO_OUTSTANDING_MAX_BYTES * 2,
427-
bytes_scale: Duration::from_millis(100),
428-
429-
// Queue-based backpressure
430-
queue_start: 500,
431-
queue_max: IO_OUTSTANDING_MAX_JOBS as u64 * 2,
432-
queue_scale: Duration::from_millis(5),
433-
}
434-
}
435-
436356
/*
437357
* This is used to submit a new BlockOp IO request to Crucible.
438358
*
@@ -941,17 +861,17 @@ impl GuestIoHandle {
941861

942862
#[cfg(test)]
943863
pub fn disable_queue_backpressure(&mut self) {
944-
self.backpressure_config.queue_scale = Duration::ZERO;
864+
self.backpressure_config.queue.scale = Duration::ZERO;
945865
}
946866

947867
#[cfg(test)]
948868
pub fn disable_byte_backpressure(&mut self) {
949-
self.backpressure_config.bytes_scale = Duration::ZERO;
869+
self.backpressure_config.bytes.scale = Duration::ZERO;
950870
}
951871

952872
#[cfg(test)]
953873
pub fn is_queue_backpressure_disabled(&self) -> bool {
954-
self.backpressure_config.queue_scale == Duration::ZERO
874+
self.backpressure_config.queue.scale == Duration::ZERO
955875
}
956876

957877
/// Set `self.backpressure_us` based on outstanding IO ratio
@@ -1460,38 +1380,4 @@ mod test {
14601380

14611381
Ok(())
14621382
}
1463-
1464-
#[test]
1465-
fn check_max_backpressure() {
1466-
let cfg = Guest::default_backpressure_config();
1467-
let t = cfg.get_backpressure_us(
1468-
IO_OUTSTANDING_MAX_BYTES * 2 - 1024u64.pow(2),
1469-
0,
1470-
);
1471-
let timeout = Duration::from_micros(t);
1472-
println!(
1473-
"max byte-based delay: {}",
1474-
humantime::format_duration(timeout)
1475-
);
1476-
assert!(
1477-
timeout > Duration::from_secs(60 * 60),
1478-
"max byte-based backpressure delay is too low;
1479-
expected > 1 hr, got {}",
1480-
humantime::format_duration(timeout)
1481-
);
1482-
1483-
let t =
1484-
cfg.get_backpressure_us(0, IO_OUTSTANDING_MAX_JOBS as u64 * 2 - 1);
1485-
let timeout = Duration::from_micros(t);
1486-
println!(
1487-
"max job-based delay: {}",
1488-
humantime::format_duration(timeout)
1489-
);
1490-
assert!(
1491-
timeout > Duration::from_secs(60 * 60),
1492-
"max job-based backpressure delay is too low;
1493-
expected > 1 hr, got {}",
1494-
humantime::format_duration(timeout)
1495-
);
1496-
}
14971383
}

0 commit comments

Comments
 (0)