Skip to content

Commit 25d375a

Browse files
authored
Use RangeSet instead of tracking every complete job (#1663)
Right now, we store each completed `JobId` and search linearly through the list when checking for dependencies. This means that a flush, which is dependent on all previous jobs, takes `O(N^2)` time to check its dependencies. This PR switches to a [`RangeSet`](https://docs.rs/rangemap/latest/rangemap/set/struct.RangeSet.html), which provides `O(log N)` lookup. In practice, we expect it to be `O(1)`: jobs are completed in order, so the `RangeSet` will only contain a single contiguous range. I did a quick audit of the `rangemap` crate, and it seems reasonable: - No dependencies - `proptest` based testing, Github CI - Consistent docstrings There are two small behavior changes: - `completed()` now allocates, expanding the `RangeSet` into a `Vec<JobId>` (instead of a `&[JobId]`). It's only used in unit tests and our debug work-printing function, so I think that's fine. - The output of `completed()` is now sorted. This required changing one test assertion.
1 parent 1b3558c commit 25d375a

File tree

5 files changed

+25
-12
lines changed

5 files changed

+25
-12
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ rayon = "1.10.0"
7878
rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] }
7979
rand_chacha = "0.3.1"
8080
reedline = "0.38.0"
81+
rangemap = "1.5.1"
8182
reqwest = { version = "0.12", features = ["default", "blocking", "json", "stream"] }
8283
ringbuffer = "0.15.0"
8384
rusqlite = { version = "0.32" }

downstairs/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ opentelemetry.workspace = true
3131
oximeter-producer.workspace = true
3232
oximeter.workspace = true
3333
rand.workspace = true
34+
rangemap.workspace = true
3435
rayon.workspace = true
3536
repair-client.workspace = true
3637
reqwest.workspace = true

downstairs/src/complete_jobs.rs

+13-9
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
use crate::JobId;
2+
use rangemap::RangeSet;
23

34
/// Stores a set of complete jobs
45
#[derive(Debug)]
56
pub struct CompletedJobs {
6-
completed: Vec<JobId>,
7+
completed: RangeSet<JobId>,
78
}
89

910
impl CompletedJobs {
1011
pub fn new(last_flush: Option<JobId>) -> Self {
1112
Self {
12-
completed: last_flush.into_iter().collect(),
13+
completed: last_flush
14+
.into_iter()
15+
.map(|id| id..JobId(id.0 + 1))
16+
.collect(),
1317
}
1418
}
1519

@@ -20,7 +24,7 @@ impl CompletedJobs {
2024

2125
/// Records a new complete job in the list
2226
pub fn push(&mut self, id: JobId) {
23-
self.completed.push(id);
27+
self.completed.insert(id..JobId(id.0 + 1));
2428
}
2529

2630
/// Resets the data structure, given a new barrier operation
@@ -29,20 +33,20 @@ impl CompletedJobs {
2933
/// oldest complete job.
3034
pub fn reset(&mut self, id: JobId) {
3135
self.completed.clear();
32-
self.completed.push(id);
36+
self.completed.insert(id..JobId(id.0 + 1));
3337
}
3438

3539
/// Checks whether the given job is complete
3640
///
3741
/// A job is complete if it is listed in the set of complete jobs.
3842
pub fn is_complete(&self, id: JobId) -> bool {
39-
// We deliberately reverse the `completed` list because new jobs are at
40-
// the back and are more likely to be what we care about
41-
self.completed.iter().rev().any(|j| *j == id)
43+
self.completed.contains(&id)
4244
}
4345

4446
/// Returns the list of completed jobs
45-
pub fn completed(&self) -> &[JobId] {
46-
&self.completed
47+
pub fn completed(&self) -> impl Iterator<Item = JobId> + use<'_> {
48+
self.completed
49+
.iter()
50+
.flat_map(|r| (r.start.0..r.end.0).map(JobId))
4751
}
4852
}

downstairs/src/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -3288,8 +3288,8 @@ impl Work {
32883288
}
32893289
}
32903290

3291-
fn completed(&self) -> &[JobId] {
3292-
self.completed.completed()
3291+
fn completed(&self) -> Vec<JobId> {
3292+
self.completed.completed().collect()
32933293
}
32943294

32953295
/// Pushes a new job to the back of the queue
@@ -4947,7 +4947,7 @@ mod test {
49474947
test_do_work(&mut work, next_jobs);
49484948
assert_eq!(
49494949
work.completed(),
4950-
vec![JobId(1000), JobId(2000), JobId(1001), JobId(2001)]
4950+
vec![JobId(1000), JobId(1001), JobId(2000), JobId(2001)]
49514951
);
49524952

49534953
let next_jobs = test_push_next_jobs(&mut work);

0 commit comments

Comments
 (0)