Skip to content

Commit 07cb9ed

Browse files
authored
Simplify block context for reads (#1391)
Right now, the `ReadResponse` sends back a `Vec<Option<BlockContext>>`, i.e. an `Option<BlockContext>` for each block. The `BlockContext` in turn contains ```rust pub struct BlockContext { pub hash: u64, pub encryption_context: Option<EncryptionContext>, } ``` If `encryption_context` is populated, then sending `hash` to the Upstairs is unnecessary: we are using authenticated encryption, so we know whether the block data + context is valid based on whether it decrypted successfully. This PR removes `hash` from the `ReadResponse` message in favor of a new `enum`: ```rust #[derive(Debug, PartialEq, Copy, Clone, Serialize, Deserialize)] pub enum ReadBlockContext { Empty, Encrypted { ctx: EncryptionContext }, Unencrypted { hash: u64 }, } ``` This does not change the on-disk format or write message format, which both continue to use hashes: - When **sending** data to the Downstairs, the hash (in `BlockContext`) lets us detect corruption in transit. We can't use the encryption context here, because the Downstairs isn't allowed to have encryption keys - When recovering from a bad write, `on_disk_hash` is used to figure out which context slot is valid. `on_disk_hash` is never sent over the network¹ This PR is a step towards [RFD 490 § Metadata reduction](https://rfd.shared.oxide.computer/rfd/490#_metadata_reduction), which proposes to **not** store block hashes for encrypted data on disk. If in the future we don't store block hashes for encrypted data, we would not be able to send them over the network; this PR removes that future hurdle. However, the PR stands alone as a small optimization (39 → 32 bytes per block) that simplifies program behavior (no need to think about what happens if encryption fails but the hash matches, or vice versa). ¹ `on_disk_hash` is also _technically_ superfluous if we already have `hash` (see #1161), but this PR doesn't change it
1 parent 5951c65 commit 07cb9ed

14 files changed

+286
-261
lines changed

downstairs/src/dump.rs

+20-22
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,10 @@ fn show_extent_block(
699699
for (dir_index, r) in dvec.iter().enumerate() {
700700
print!("{:^24} ", dir_index);
701701

702-
max_nonce_depth =
703-
std::cmp::max(max_nonce_depth, r.encryption_contexts(0).len());
702+
max_nonce_depth = std::cmp::max(
703+
max_nonce_depth,
704+
r.encryption_contexts(0).is_some() as usize,
705+
);
704706
}
705707
if !only_show_differences {
706708
print!(" {:<5}", "DIFF");
@@ -722,17 +724,14 @@ fn show_extent_block(
722724
let mut all_same_len = true;
723725
let mut nonces = Vec::with_capacity(dir_count);
724726
for r in dvec.iter() {
725-
let ctxs = r.encryption_contexts(0);
727+
// TODO this can only be 0 or 1
728+
let ctxs =
729+
r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
726730
print!(
727731
"{:^24} ",
728732
if depth < ctxs.len() {
729-
if let Some(ec) = ctxs[depth] {
730-
nonces.push(&ec.nonce);
731-
hex::encode(ec.nonce)
732-
} else {
733-
all_same_len = false;
734-
"".to_string()
735-
}
733+
nonces.push(ctxs[depth].nonce);
734+
hex::encode(ctxs[depth].nonce)
736735
} else {
737736
all_same_len = false;
738737
"".to_string()
@@ -756,8 +755,10 @@ fn show_extent_block(
756755
for (dir_index, r) in dvec.iter().enumerate() {
757756
print!("{:^32} ", dir_index);
758757

759-
max_tag_depth =
760-
std::cmp::max(max_tag_depth, r.encryption_contexts(0).len());
758+
max_tag_depth = std::cmp::max(
759+
max_tag_depth,
760+
r.encryption_contexts(0).is_some() as usize,
761+
);
761762
}
762763
if !only_show_differences {
763764
print!(" {:<5}", "DIFF");
@@ -779,17 +780,13 @@ fn show_extent_block(
779780
let mut all_same_len = true;
780781
let mut tags = Vec::with_capacity(dir_count);
781782
for r in dvec.iter() {
782-
let ctxs = r.encryption_contexts(0);
783+
let ctxs =
784+
r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
783785
print!(
784786
"{:^32} ",
785787
if depth < ctxs.len() {
786-
if let Some(ec) = ctxs[depth] {
787-
tags.push(&ec.tag);
788-
hex::encode(ec.tag)
789-
} else {
790-
all_same_len = false;
791-
"".to_string()
792-
}
788+
tags.push(ctxs[depth].tag);
789+
hex::encode(ctxs[depth].tag)
793790
} else {
794791
all_same_len = false;
795792
"".to_string()
@@ -820,7 +817,8 @@ fn show_extent_block(
820817
for (dir_index, r) in dvec.iter().enumerate() {
821818
print!("{:^16} ", dir_index);
822819

823-
max_hash_depth = std::cmp::max(max_hash_depth, r.hashes(0).len());
820+
max_hash_depth =
821+
std::cmp::max(max_hash_depth, r.hashes(0).is_some() as usize);
824822
}
825823
if !only_show_differences {
826824
print!(" {:<5}", "DIFF");
@@ -842,7 +840,7 @@ fn show_extent_block(
842840
let mut all_same_len = true;
843841
let mut hashes = Vec::with_capacity(dir_count);
844842
for r in dvec.iter() {
845-
let block_hashes = r.hashes(0);
843+
let block_hashes = r.hashes(0).into_iter().collect::<Vec<_>>();
846844
print!(
847845
"{:^16} ",
848846
if depth < block_hashes.len() {

downstairs/src/extent_inner_raw.rs

+24-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::{
1414
ExtentWrite, JobId, RegionDefinition,
1515
};
1616
use crucible_common::ExtentId;
17+
use crucible_protocol::ReadBlockContext;
1718

1819
use itertools::Itertools;
1920
use serde::{Deserialize, Serialize};
@@ -322,7 +323,15 @@ impl ExtentInner for RawInner {
322323
let blocks = self.get_block_contexts_inner(
323324
req.offset.0,
324325
num_blocks,
325-
|ctx, _block| ctx.map(|c| c.block_context),
326+
|ctx, _block| match ctx {
327+
None => ReadBlockContext::Empty,
328+
Some(c) => match c.block_context.encryption_context {
329+
Some(ctx) => ReadBlockContext::Encrypted { ctx },
330+
None => ReadBlockContext::Unencrypted {
331+
hash: c.block_context.hash,
332+
},
333+
},
334+
},
326335
)?;
327336
cdt::extent__read__get__contexts__done!(|| {
328337
(job_id.0, self.extent_number.0, num_blocks)
@@ -1604,6 +1613,7 @@ mod test {
16041613
}],
16051614
};
16061615
inner.write(JobId(10), &write, false, IOV_MAX_TEST)?;
1616+
let prev_hash = hash;
16071617

16081618
// The context should be in place, though we haven't flushed yet
16091619

@@ -1629,8 +1639,11 @@ mod test {
16291639
};
16301640
let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;
16311641

1632-
// We should not get back our data, because block 0 was written.
1633-
assert_ne!(resp.blocks, vec![Some(block_context)]);
1642+
// We should get back our old data, because block 0 was written.
1643+
assert_eq!(
1644+
resp.blocks,
1645+
vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
1646+
);
16341647
assert_ne!(resp.data, BytesMut::from(data.as_ref()));
16351648
}
16361649

@@ -1656,7 +1669,10 @@ mod test {
16561669
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
16571670

16581671
// We should get back our data! Block 1 was never written.
1659-
assert_eq!(resp.blocks, vec![Some(block_context)]);
1672+
assert_eq!(
1673+
resp.blocks,
1674+
vec![ReadBlockContext::Unencrypted { hash }]
1675+
);
16601676
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
16611677
}
16621678

@@ -1883,7 +1899,10 @@ mod test {
18831899
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
18841900

18851901
// We should get back our data! Block 1 was never written.
1886-
assert_eq!(resp.blocks, vec![Some(block_context)]);
1902+
assert_eq!(
1903+
resp.blocks,
1904+
vec![ReadBlockContext::Unencrypted { hash }]
1905+
);
18871906
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
18881907
}
18891908

downstairs/src/extent_inner_sqlite.rs

+29-9
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
ExtentWrite, JobId, RegionDefinition,
1010
};
1111
use crucible_common::{crucible_bail, ExtentId};
12-
use crucible_protocol::EncryptionContext;
12+
use crucible_protocol::{EncryptionContext, ReadBlockContext};
1313

1414
use anyhow::{bail, Result};
1515
use itertools::Itertools;
@@ -301,10 +301,18 @@ impl SqliteMoreInner {
301301
cdt::extent__read__get__contexts__done!(|| {
302302
(job_id.0, self.extent_number.0, num_blocks)
303303
});
304-
// Convert from DownstairsBlockContext -> BlockContext
304+
// Convert from DownstairsBlockContext -> ReadBlockContext
305305
let blocks = block_contexts
306306
.into_iter()
307-
.map(|bs| bs.map(|b| b.block_context))
307+
.map(|bs| match bs {
308+
None => ReadBlockContext::Empty,
309+
Some(b) => match b.block_context.encryption_context {
310+
Some(ctx) => ReadBlockContext::Encrypted { ctx },
311+
None => ReadBlockContext::Unencrypted {
312+
hash: b.block_context.hash,
313+
},
314+
},
315+
})
308316
.collect();
309317

310318
// To avoid a `memset`, we're reading directly into uninitialized
@@ -1542,6 +1550,7 @@ mod test {
15421550

15431551
// We haven't flushed, but this should leave our context in place.
15441552
inner.fully_rehash_and_clean_all_stale_contexts(false)?;
1553+
let prev_hash = hash;
15451554

15461555
// Therefore, we expect that write_unwritten to the first block won't
15471556
// do anything.
@@ -1565,8 +1574,11 @@ mod test {
15651574
};
15661575
let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;
15671576

1568-
// We should not get back our data, because block 0 was written.
1569-
assert_ne!(resp.blocks, vec![Some(block_context)]);
1577+
// We should get back our old data, because block 0 was written.
1578+
assert_eq!(
1579+
resp.blocks,
1580+
vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
1581+
);
15701582
assert_ne!(resp.data, BytesMut::from(data.as_ref()));
15711583
}
15721584

@@ -1592,7 +1604,10 @@ mod test {
15921604
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
15931605

15941606
// We should get back our data! Block 1 was never written.
1595-
assert_eq!(resp.blocks, vec![Some(block_context)]);
1607+
assert_eq!(
1608+
resp.blocks,
1609+
vec![ReadBlockContext::Unencrypted { hash }]
1610+
);
15961611
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
15971612
}
15981613

@@ -1653,7 +1668,10 @@ mod test {
16531668
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
16541669

16551670
// We should get back our data! Block 1 was never written.
1656-
assert_eq!(resp.blocks, vec![Some(block_context)]);
1671+
assert_eq!(
1672+
resp.blocks,
1673+
vec![ReadBlockContext::Unencrypted { hash }]
1674+
);
16571675
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
16581676
}
16591677

@@ -1694,8 +1712,10 @@ mod test {
16941712
data: BytesMut::with_capacity(512 * 2),
16951713
};
16961714
let resp = inner.read(JobId(31), read(), IOV_MAX_TEST)?;
1697-
let expected_blocks: Vec<_> =
1698-
block_contexts.iter().map(|b| Some(*b)).collect();
1715+
let expected_blocks: Vec<_> = block_contexts
1716+
.iter()
1717+
.map(|b| ReadBlockContext::Unencrypted { hash: b.hash })
1718+
.collect();
16991719
assert_eq!(resp.blocks, expected_blocks);
17001720
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
17011721

downstairs/src/lib.rs

+24-18
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ use crucible_common::{
2020
};
2121
use crucible_protocol::{
2222
BlockContext, CrucibleDecoder, JobId, Message, MessageWriter,
23-
ReconciliationId, SnapshotDetails, CRUCIBLE_MESSAGE_VERSION,
23+
ReadBlockContext, ReconciliationId, SnapshotDetails,
24+
CRUCIBLE_MESSAGE_VERSION,
2425
};
2526
use repair_client::Client;
2627

@@ -204,7 +205,7 @@ impl IntoIterator for RegionReadRequest {
204205
/// Do not derive `Clone` on this type; it will be expensive and tempting to
205206
/// call by accident!
206207
pub(crate) struct RegionReadResponse {
207-
blocks: Vec<Option<BlockContext>>,
208+
blocks: Vec<ReadBlockContext>,
208209
data: BytesMut,
209210
uninit: BytesMut,
210211
}
@@ -281,24 +282,27 @@ impl RegionReadResponse {
281282
assert!(was_empty || prev_ptr == self.data.as_ptr());
282283
}
283284

284-
/// Returns hashes for the given response
285-
///
286-
/// This is expensive and should only be used for debugging
287-
pub fn hashes(&self, i: usize) -> Vec<u64> {
288-
self.blocks[i].iter().map(|ctx| ctx.hash).collect()
285+
/// Returns the hash for the given response (if unencrypted)
286+
pub fn hashes(&self, i: usize) -> Option<u64> {
287+
match self.blocks[i] {
288+
ReadBlockContext::Empty | ReadBlockContext::Encrypted { .. } => {
289+
None
290+
}
291+
ReadBlockContext::Unencrypted { hash } => Some(hash),
292+
}
289293
}
290294

291295
/// Returns encryption contexts for the given response
292-
///
293-
/// This is expensive and should only be used for debugging
294296
pub fn encryption_contexts(
295297
&self,
296298
i: usize,
297-
) -> Vec<Option<&crucible_protocol::EncryptionContext>> {
298-
self.blocks[i]
299-
.iter()
300-
.map(|ctx| ctx.encryption_context.as_ref())
301-
.collect()
299+
) -> Option<crucible_protocol::EncryptionContext> {
300+
match self.blocks[i] {
301+
ReadBlockContext::Empty | ReadBlockContext::Unencrypted { .. } => {
302+
None
303+
}
304+
ReadBlockContext::Encrypted { ctx } => Some(ctx),
305+
}
302306
}
303307
}
304308

@@ -307,7 +311,7 @@ impl RegionReadResponse {
307311
/// Do not derive `Clone` on this type; it will be expensive and tempting to
308312
/// call by accident!
309313
pub(crate) struct ExtentReadResponse {
310-
blocks: Vec<Option<BlockContext>>,
314+
blocks: Vec<ReadBlockContext>,
311315
/// At this point, the buffer must be fully initialized
312316
data: BytesMut,
313317
}
@@ -5444,9 +5448,11 @@ mod test {
54445448

54455449
assert_eq!(response.blocks.len(), 1);
54465450

5447-
let hashes = response.hashes(0);
5448-
assert_eq!(hashes.len(), 1);
5449-
assert_eq!(integrity_hash(&[&response.data[..]]), hashes[0],);
5451+
let hash = response.hashes(0);
5452+
assert_eq!(
5453+
integrity_hash(&[&response.data[..]]),
5454+
hash.unwrap()
5455+
);
54505456

54515457
read_data.extend_from_slice(&response.data[..]);
54525458
}

0 commit comments

Comments
 (0)