Skip to content

Commit 3f0e6cf

Browse files
committed
Use FIOFFS instead of fsync (on supported platforms)
1 parent 5951c65 commit 3f0e6cf

File tree

7 files changed

+282
-104
lines changed

7 files changed

+282
-104
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ itertools = "0.12.1"
6262
libc = "0.2"
6363
mime_guess = "2.0.4"
6464
nbd = "0.3.1"
65-
nix = { version = "0.28", features = [ "feature", "uio" ] }
65+
nix = { version = "0.28", features = [ "feature", "uio", "ioctl" ] }
6666
num_enum = "0.7"
6767
num-derive = "0.4"
6868
num-traits = "0.2"

downstairs/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,4 @@ asm = ["usdt/asm"]
7171
default = []
7272
zfs_snapshot = []
7373
integration-tests = [] # Enables creating SQLite volumes
74+
omicron-build = [] # Uses FIOFSS for flushes instead of fsync

downstairs/src/extent.rs

+72-6
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,45 @@ pub(crate) trait ExtentInner: Send + Sync + Debug {
3636
fn flush_number(&self) -> Result<u64, CrucibleError>;
3737
fn dirty(&self) -> Result<bool, CrucibleError>;
3838

39-
fn flush(
39+
/// Performs any metadata updates needed before a flush
40+
fn pre_flush(
41+
&mut self,
42+
new_flush: u64,
43+
new_gen: u64,
44+
job_id: JobOrReconciliationId,
45+
) -> Result<(), CrucibleError>;
46+
47+
/// Syncs all relevant data to persistant storage
48+
fn flush_inner(
49+
&mut self,
50+
job_id: JobOrReconciliationId,
51+
) -> Result<(), CrucibleError>;
52+
53+
/// Performs any metadata updates after syncing data to persistent storage
54+
fn post_flush(
4055
&mut self,
4156
new_flush: u64,
4257
new_gen: u64,
4358
job_id: JobOrReconciliationId,
4459
) -> Result<(), CrucibleError>;
4560

61+
/// Performs a full flush (pre/inner/post)
62+
///
63+
/// This is only exposed for the sake of unit testing; normal code should
64+
/// use the fine-grained functions and be forced to consider performance.
65+
#[cfg(test)]
66+
fn flush(
67+
&mut self,
68+
new_flush: u64,
69+
new_gen: u64,
70+
job_id: JobOrReconciliationId,
71+
) -> Result<(), CrucibleError> {
72+
self.pre_flush(new_flush, new_gen, job_id)?;
73+
self.flush_inner(job_id)?;
74+
self.post_flush(new_flush, new_gen, job_id)?;
75+
Ok(())
76+
}
77+
4678
fn read(
4779
&mut self,
4880
job_id: JobId,
@@ -578,22 +610,25 @@ impl Extent {
578610
Ok(())
579611
}
580612

581-
#[instrument]
582-
pub(crate) fn flush<I: Into<JobOrReconciliationId> + Debug>(
613+
/// Prepares for a flush
614+
///
615+
/// Returns `false` if we should skip the flush (because this extent is not
616+
/// dirty), or `true` if we should proceed.
617+
pub(crate) fn pre_flush<I: Into<JobOrReconciliationId> + Debug>(
583618
&mut self,
584619
new_flush: u64,
585620
new_gen: u64,
586621
id: I, // only used for logging
587622
log: &Logger,
588-
) -> Result<(), CrucibleError> {
623+
) -> Result<bool, CrucibleError> {
589624
let job_id: JobOrReconciliationId = id.into();
590625

591626
if !self.inner.dirty()? {
592627
/*
593628
* If we have made no writes to this extent since the last flush,
594629
* we do not need to update the extent on disk
595630
*/
596-
return Ok(());
631+
return Ok(false);
597632
}
598633

599634
// Read only extents should never have the dirty bit set. If they do,
@@ -604,7 +639,38 @@ impl Extent {
604639
crucible_bail!(ModifyingReadOnlyRegion);
605640
}
606641

607-
self.inner.flush(new_flush, new_gen, job_id)
642+
self.inner.pre_flush(new_flush, new_gen, job_id)?;
643+
Ok(true)
644+
}
645+
646+
/// Performs post-flush cleanup
647+
pub(crate) fn post_flush<I: Into<JobOrReconciliationId> + Debug>(
648+
&mut self,
649+
new_flush: u64,
650+
new_gen: u64,
651+
id: I, // only used for logging
652+
) -> Result<(), CrucibleError> {
653+
let job_id: JobOrReconciliationId = id.into();
654+
self.inner.post_flush(new_flush, new_gen, job_id)
655+
}
656+
657+
/// Flushes this extent if it is dirty
658+
#[instrument]
659+
pub(crate) fn flush<
660+
I: Into<JobOrReconciliationId> + Debug + Copy + Clone,
661+
>(
662+
&mut self,
663+
new_flush: u64,
664+
new_gen: u64,
665+
id: I, // only used for logging
666+
log: &Logger,
667+
) -> Result<(), CrucibleError> {
668+
if !self.pre_flush(new_flush, new_gen, id, log)? {
669+
return Ok(());
670+
}
671+
self.inner.flush_inner(id.into())?;
672+
self.post_flush(new_flush, new_gen, id)?;
673+
Ok(())
608674
}
609675

610676
pub fn get_meta_info(&self) -> ExtentMeta {

downstairs/src/extent_inner_raw.rs

+19-14
Original file line numberDiff line numberDiff line change
@@ -378,20 +378,12 @@ impl ExtentInner for RawInner {
378378
Ok(ExtentReadResponse { data: buf, blocks })
379379
}
380380

381-
fn flush(
381+
fn pre_flush(
382382
&mut self,
383383
new_flush: u64,
384384
new_gen: u64,
385385
job_id: JobOrReconciliationId,
386386
) -> Result<(), CrucibleError> {
387-
if !self.dirty()? {
388-
/*
389-
* If we have made no writes to this extent since the last flush,
390-
* we do not need to update the extent on disk
391-
*/
392-
return Ok(());
393-
}
394-
395387
cdt::extent__flush__start!(|| {
396388
(job_id.get(), self.extent_number.0, 0)
397389
});
@@ -400,10 +392,17 @@ impl ExtentInner for RawInner {
400392
// operation atomic.
401393
self.set_flush_number(new_flush, new_gen)?;
402394

395+
Ok(())
396+
}
397+
398+
fn flush_inner(
399+
&mut self,
400+
job_id: JobOrReconciliationId,
401+
) -> Result<(), CrucibleError> {
403402
// Now, we fsync to ensure data is flushed to disk. It's okay to crash
404403
// before this point, because setting the flush number is atomic.
405404
cdt::extent__flush__file__start!(|| {
406-
(job_id.get(), self.extent_number.0, 0)
405+
(job_id.get(), self.extent_number.0)
407406
});
408407
if let Err(e) = self.file.sync_all() {
409408
/*
@@ -416,9 +415,17 @@ impl ExtentInner for RawInner {
416415
}
417416
self.context_slot_dirty.fill(0);
418417
cdt::extent__flush__file__done!(|| {
419-
(job_id.get(), self.extent_number.0, 0)
418+
(job_id.get(), self.extent_number.0)
420419
});
420+
Ok(())
421+
}
421422

423+
fn post_flush(
424+
&mut self,
425+
_new_flush: u64,
426+
_new_gen: u64,
427+
job_id: JobOrReconciliationId,
428+
) -> Result<(), CrucibleError> {
422429
// Check for fragmentation in the context slots leading to worse
423430
// performance, and defragment if that's the case.
424431
let extra_syscalls_per_rw = self
@@ -433,9 +440,7 @@ impl ExtentInner for RawInner {
433440
Ok(())
434441
};
435442

436-
cdt::extent__flush__done!(|| {
437-
(job_id.get(), self.extent_number.0, 0)
438-
});
443+
cdt::extent__flush__done!(|| { (job_id.get(), self.extent_number.0) });
439444

440445
r
441446
}

downstairs/src/extent_inner_sqlite.rs

+46-13
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,32 @@ impl ExtentInner for SqliteInner {
3636
self.0.lock().unwrap().dirty()
3737
}
3838

39-
fn flush(
39+
fn pre_flush(
4040
&mut self,
4141
new_flush: u64,
4242
new_gen: u64,
4343
job_id: JobOrReconciliationId,
4444
) -> Result<(), CrucibleError> {
45-
self.0.lock().unwrap().flush(new_flush, new_gen, job_id)
45+
self.0.lock().unwrap().pre_flush(new_flush, new_gen, job_id)
46+
}
47+
48+
fn flush_inner(
49+
&mut self,
50+
job_id: JobOrReconciliationId,
51+
) -> Result<(), CrucibleError> {
52+
self.0.lock().unwrap().flush_inner(job_id)
53+
}
54+
55+
fn post_flush(
56+
&mut self,
57+
new_flush: u64,
58+
new_gen: u64,
59+
job_id: JobOrReconciliationId,
60+
) -> Result<(), CrucibleError> {
61+
self.0
62+
.lock()
63+
.unwrap()
64+
.post_flush(new_flush, new_gen, job_id)
4665
}
4766

4867
fn read(
@@ -194,10 +213,10 @@ impl SqliteMoreInner {
194213
Ok(self.dirty.get())
195214
}
196215

197-
fn flush(
216+
fn pre_flush(
198217
&mut self,
199-
new_flush: u64,
200-
new_gen: u64,
218+
_new_flush: u64,
219+
_new_gen: u64,
201220
job_id: JobOrReconciliationId,
202221
) -> Result<(), CrucibleError> {
203222
// Used for profiling
@@ -207,12 +226,19 @@ impl SqliteMoreInner {
207226
(job_id.get(), self.extent_number.0, n_dirty_blocks)
208227
});
209228

229+
Ok(())
230+
}
231+
232+
fn flush_inner(
233+
&mut self,
234+
job_id: JobOrReconciliationId,
235+
) -> Result<(), CrucibleError> {
210236
/*
211237
* We must first fsync to get any outstanding data written to disk.
212238
* This must be done before we update the flush number.
213239
*/
214240
cdt::extent__flush__file__start!(|| {
215-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
241+
(job_id.get(), self.extent_number.0)
216242
});
217243
if let Err(e) = self.file.sync_all() {
218244
/*
@@ -225,9 +251,18 @@ impl SqliteMoreInner {
225251
);
226252
}
227253
cdt::extent__flush__file__done!(|| {
228-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
254+
(job_id.get(), self.extent_number.0)
229255
});
230256

257+
Ok(())
258+
}
259+
260+
fn post_flush(
261+
&mut self,
262+
new_flush: u64,
263+
new_gen: u64,
264+
job_id: JobOrReconciliationId,
265+
) -> Result<(), CrucibleError> {
231266
// Clear old block contexts. In order to be crash consistent, only
232267
// perform this after the extent fsync is done. For each block
233268
// written since the last flush, remove all block context rows where
@@ -237,7 +272,7 @@ impl SqliteMoreInner {
237272
// file is rehashed, since in that case we don't have that luxury.
238273

239274
cdt::extent__flush__collect__hashes__start!(|| {
240-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
275+
(job_id.get(), self.extent_number.0)
241276
});
242277

243278
// Rehash any parts of the file that we *may have written* data to since
@@ -250,7 +285,7 @@ impl SqliteMoreInner {
250285
});
251286

252287
cdt::extent__flush__sqlite__insert__start!(|| {
253-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
288+
(job_id.get(), self.extent_number.0)
254289
});
255290

256291
// We put all of our metadb updates into a single transaction to
@@ -265,7 +300,7 @@ impl SqliteMoreInner {
265300
)?;
266301

267302
cdt::extent__flush__sqlite__insert__done!(|| {
268-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
303+
(job_id.get(), self.extent_number.0)
269304
});
270305

271306
self.set_flush_number(new_flush, new_gen)?;
@@ -275,9 +310,7 @@ impl SqliteMoreInner {
275310
// Finally, reset the file's seek offset to 0
276311
self.file.seek(SeekFrom::Start(0))?;
277312

278-
cdt::extent__flush__done!(|| {
279-
(job_id.get(), self.extent_number.0, n_dirty_blocks)
280-
});
313+
cdt::extent__flush__done!(|| { (job_id.get(), self.extent_number.0) });
281314
Ok(())
282315
}
283316

downstairs/src/lib.rs

+8-28
Original file line numberDiff line numberDiff line change
@@ -737,44 +737,24 @@ pub mod cdt {
737737
fn submit__writeunwritten__done(_: u64) {}
738738
fn submit__write__done(_: u64) {}
739739
fn submit__flush__done(_: u64) {}
740-
fn extent__flush__start(job_id: u64, extent_id: u32, extent_size: u64) {}
741-
fn extent__flush__done(job_id: u64, extent_id: u32, extent_size: u64) {}
742-
fn extent__flush__file__start(
740+
fn extent__flush__start(
743741
job_id: u64,
744742
extent_id: u32,
745-
extent_size: u64,
746-
) {
747-
}
748-
fn extent__flush__file__done(
749-
job_id: u64,
750-
extent_id: u32,
751-
extent_size: u64,
752-
) {
753-
}
754-
fn extent__flush__collect__hashes__start(
755-
job_id: u64,
756-
extent_id: u32,
757-
num_dirty: u64,
743+
num_dirty_blocks: u64,
758744
) {
759745
}
746+
fn extent__flush__done(job_id: u64, extent_id: u32) {}
747+
fn extent__flush__file__start(job_id: u64, extent_id: u32) {}
748+
fn extent__flush__file__done(job_id: u64, extent_id: u32) {}
749+
fn extent__flush__collect__hashes__start(job_id: u64, extent_id: u32) {}
760750
fn extent__flush__collect__hashes__done(
761751
job_id: u64,
762752
extent_id: u32,
763753
num_rehashed: u64,
764754
) {
765755
}
766-
fn extent__flush__sqlite__insert__start(
767-
job_id: u64,
768-
extent_id: u32,
769-
extent_size: u64,
770-
) {
771-
}
772-
fn extent__flush__sqlite__insert__done(
773-
_job_id: u64,
774-
_extent_id: u32,
775-
extent_size: u64,
776-
) {
777-
}
756+
fn extent__flush__sqlite__insert__start(job_id: u64, extent_id: u32) {}
757+
fn extent__flush__sqlite__insert__done(job_id: u64, extent_id: u32) {}
778758
fn extent__write__start(job_id: u64, extent_id: u32, n_blocks: u64) {}
779759
fn extent__write__done(job_id: u64, extent_id: u32, n_blocks: u64) {}
780760
fn extent__write__get__hashes__start(

0 commit comments

Comments
 (0)