Skip to content

Commit 0b00580

Browse files
authored
Add repair server dynamometer (#1618)
Here's an example usage on dogfood, measuring a region snapshot's downstairs on another sled: # /tmp/jwm-downstairs repair-dynamometer \ --clone-source '[fd00:1122:3344:107::c]:23009' using http://[fd00:1122:3344:107::c]:23009 The source RegionDefinition is: RegionDefinition { block_size: 512, extent_size: Block { value: 131072, shift: 9 }, extent_count: 16, uuid: 31fb7fa7-0432-409e-8bec-571b6613b188, encrypted: true, database_read_version: 1, database_write_version: 1 } The source mode is: true Repair extent 0 eid:0 Found repair files: ["000", "000.db", "000.db-shm", "000.db-wal"] Repair extent 1 eid:1 Found repair files: ["001", "001.db", "001.db-shm", "001.db-wal"] Repair extent 2 eid:2 Found repair files: ["002", "002.db", "002.db-shm", "002.db-wal"] Repair extent 3 eid:3 Found repair files: ["003", "003.db", "003.db-shm", "003.db-wal"] bytes per second: 222755540 Repair extent 4 eid:4 Found repair files: ["004", "004.db", "004.db-shm", "004.db-wal"] Repair extent 5 eid:5 Found repair files: ["005", "005.db", "005.db-shm", "005.db-wal"] Repair extent 6 eid:6 Found repair files: ["006", "006.db", "006.db-shm", "006.db-wal"] bytes per second: 224481150 Repair extent 7 eid:7 Found repair files: ["007", "007.db", "007.db-shm", "007.db-wal"] Repair extent 8 eid:8 Found repair files: ["008", "008.db", "008.db-shm", "008.db-wal"] Repair extent 9 eid:9 Found repair files: ["009", "009.db", "009.db-shm", "009.db-wal"] Repair extent 10 eid:10 Found repair files: ["00A", "00A.db", "00A.db-shm", "00A.db-wal"] bytes per second: 227660750 Repair extent 11 eid:11 Found repair files: ["00B", "00B.db", "00B.db-shm", "00B.db-wal"] Repair extent 12 eid:12 Found repair files: ["00C", "00C.db", "00C.db-shm", "00C.db-wal"] Repair extent 13 eid:13 Found repair files: ["00D", "00D.db", "00D.db-shm", "00D.db-wal"] bytes per second: 228354910 Repair extent 14 eid:14 Found repair files: ["00E", "00E.db", "00E.db-shm", "00E.db-wal"] Repair extent 15 eid:15 Found repair files: ["00F", "00F.db", "00F.db-shm", "00F.db-wal"] B/s: [222755540.0, 224481150.0, 227660750.0, 228354910.0] B/S mean 225813090 stddev 2645588 B/s min 222755540 max 228354910 Bytes per second looks like about 215.4 MiB/s here.
1 parent d2d8f8a commit 0b00580

File tree

2 files changed

+114
-2
lines changed

2 files changed

+114
-2
lines changed

downstairs/src/dynamometer.rs

+105
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright 2023 Oxide Computer Company
22
use super::*;
33

4+
use futures::TryStreamExt;
5+
46
pub enum DynoFlushConfig {
57
FlushPerIops(usize),
68
FlushPerBlocks(usize),
@@ -187,3 +189,106 @@ pub fn dynamometer(
187189

188190
Ok(())
189191
}
192+
193+
pub async fn repair_dynamometer(clone_source: SocketAddr) -> Result<()> {
194+
let mut bytes_received = 0;
195+
let mut measurement_time = Instant::now();
196+
let mut bytes_per_second: Vec<f32> = vec![];
197+
198+
let url = format!("http://{:?}", clone_source);
199+
println!("using {url}");
200+
let repair = repair_client::Client::new(&url);
201+
202+
let source_def = match repair.get_region_info().await {
203+
Ok(def) => def.into_inner(),
204+
Err(e) => {
205+
bail!("Failed to get source region definition: {e}");
206+
}
207+
};
208+
209+
println!("The source RegionDefinition is: {:?}", source_def);
210+
211+
let source_ro_mode = match repair.get_region_mode().await {
212+
Ok(ro) => ro.into_inner(),
213+
Err(e) => {
214+
bail!("Failed to get source mode: {e}");
215+
}
216+
};
217+
218+
println!("The source mode is: {:?}", source_ro_mode);
219+
if !source_ro_mode {
220+
bail!("Source downstairs is not read only");
221+
}
222+
223+
for eid in (0..source_def.extent_count()).map(ExtentId) {
224+
println!("Repair extent {eid}");
225+
226+
let mut repair_files = match repair.get_files_for_extent(eid.0).await {
227+
Ok(f) => f.into_inner(),
228+
Err(e) => {
229+
bail!("Failed to get repair files: {:?}", e,);
230+
}
231+
};
232+
233+
repair_files.sort();
234+
println!("eid:{} Found repair files: {:?}", eid, repair_files);
235+
236+
let mut stream = match repair
237+
.get_extent_file(eid.0, repair_client::types::FileType::Data)
238+
.await
239+
{
240+
Ok(rs) => rs,
241+
Err(e) => {
242+
bail!("Failed to get extent {} db file: {:?}", eid, e,);
243+
}
244+
};
245+
246+
loop {
247+
match stream.try_next().await {
248+
Ok(Some(bytes)) => {
249+
bytes_received += bytes.len();
250+
251+
let elapsed = measurement_time.elapsed();
252+
253+
if elapsed > Duration::from_secs(1) {
254+
let fractional_seconds: f32 = elapsed.as_secs() as f32
255+
+ (elapsed.subsec_nanos() as f32 / 1e9);
256+
257+
println!(
258+
"bytes per second: {}",
259+
bytes_received as f32 / fractional_seconds
260+
);
261+
bytes_per_second
262+
.push(bytes_received as f32 / fractional_seconds);
263+
bytes_received = 0;
264+
measurement_time = Instant::now();
265+
}
266+
}
267+
268+
Ok(None) => break,
269+
270+
Err(e) => {
271+
bail!("repair stream error: {:?}", e);
272+
}
273+
}
274+
}
275+
}
276+
277+
println!("B/s: {:?}", bytes_per_second);
278+
println!(
279+
"B/S mean {} stddev {}",
280+
statistical::mean(&bytes_per_second),
281+
statistical::standard_deviation(&bytes_per_second, None),
282+
);
283+
284+
bytes_per_second
285+
.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
286+
287+
println!(
288+
"B/s min {} max {}",
289+
bytes_per_second.first().unwrap(),
290+
bytes_per_second.last().unwrap(),
291+
);
292+
293+
Ok(())
294+
}

downstairs/src/main.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// Copyright 2023 Oxide Computer Company
2-
32
use std::net::{IpAddr, SocketAddr};
43
use std::path::PathBuf;
54
use std::time::Duration;
@@ -221,7 +220,7 @@ enum Args {
221220
bind_addr: SocketAddr,
222221
},
223222
Version,
224-
/// Measure an isolated downstairs
223+
/// Measure an isolated downstairs' disk usage
225224
Dynamometer {
226225
#[clap(long, default_value_t = 512)]
227226
block_size: u64,
@@ -258,6 +257,11 @@ enum Args {
258257
#[clap(long, value_parser = parse_duration, conflicts_with_all = ["flush_per_iops", "flush_per_blocks"])]
259258
flush_per_ms: Option<Duration>,
260259
},
260+
/// Measure a downstairs' repair server
261+
RepairDynamometer {
262+
#[clap(long, value_name = "SOURCE", action)]
263+
clone_source: SocketAddr,
264+
},
261265
}
262266

263267
fn parse_duration(arg: &str) -> Result<Duration, std::num::ParseIntError> {
@@ -499,5 +503,8 @@ async fn main() -> Result<()> {
499503

500504
dynamometer(region, num_writes, samples, flush_config)
501505
}
506+
Args::RepairDynamometer { clone_source } => {
507+
repair_dynamometer(clone_source).await
508+
}
502509
}
503510
}

0 commit comments

Comments
 (0)