Skip to content

Commit

Permalink
Simplify block context for reads (#1391)
Browse files Browse the repository at this point in the history
Right now, the `ReadResponse` sends back a `Vec<Option<BlockContext>>`,
i.e. an `Option<BlockContext>` for each block. The `BlockContext` in
turn contains

```rust
pub struct BlockContext {
    pub hash: u64,
    pub encryption_context: Option<EncryptionContext>,
}
```

If `encryption_context` is populated, then sending `hash` to the
Upstairs is unnecessary: we are using authenticated encryption, so we
know whether the block data + context is valid based on whether it
decrypted successfully.

This PR removes `hash` from the `ReadResponse` message in favor of a new
`enum`:
```rust
#[derive(Debug, PartialEq, Copy, Clone, Serialize, Deserialize)]
pub enum ReadBlockContext {
    Empty,
    Encrypted { ctx: EncryptionContext },
    Unencrypted { hash: u64 },
}
```

This does not change the on-disk format or write message format, which
both continue to use hashes:

- When **sending** data to the Downstairs, the hash (in `BlockContext`)
lets us detect corruption in transit. We can't use the encryption
context here, because the Downstairs isn't allowed to have encryption
keys
- When recovering from a bad write, `on_disk_hash` is used to figure out
which context slot is valid. `on_disk_hash` is never sent over the
network¹

This PR is a step towards [RFD 490 § Metadata
reduction](https://rfd.shared.oxide.computer/rfd/490#_metadata_reduction),
which proposes to **not** store block hashes for encrypted data on disk.
If in the future we don't store block hashes for encrypted data, we
would not be able to send them over the network; this PR removes that
future hurdle.

However, the PR stands alone as a small optimization (39 → 32 bytes per
block) that simplifies program behavior (no need to think about what
happens if encryption fails but the hash matches, or vice versa).

¹ `on_disk_hash` is also _technically_ superfluous if we already have
`hash` (see #1161), but
this PR doesn't change it
  • Loading branch information
mkeeter authored Jul 31, 2024
1 parent 5951c65 commit 07cb9ed
Show file tree
Hide file tree
Showing 14 changed files with 286 additions and 261 deletions.
42 changes: 20 additions & 22 deletions downstairs/src/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,8 +699,10 @@ fn show_extent_block(
for (dir_index, r) in dvec.iter().enumerate() {
print!("{:^24} ", dir_index);

max_nonce_depth =
std::cmp::max(max_nonce_depth, r.encryption_contexts(0).len());
max_nonce_depth = std::cmp::max(
max_nonce_depth,
r.encryption_contexts(0).is_some() as usize,
);
}
if !only_show_differences {
print!(" {:<5}", "DIFF");
Expand All @@ -722,17 +724,14 @@ fn show_extent_block(
let mut all_same_len = true;
let mut nonces = Vec::with_capacity(dir_count);
for r in dvec.iter() {
let ctxs = r.encryption_contexts(0);
// TODO this can only be 0 or 1
let ctxs =
r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
print!(
"{:^24} ",
if depth < ctxs.len() {
if let Some(ec) = ctxs[depth] {
nonces.push(&ec.nonce);
hex::encode(ec.nonce)
} else {
all_same_len = false;
"".to_string()
}
nonces.push(ctxs[depth].nonce);
hex::encode(ctxs[depth].nonce)
} else {
all_same_len = false;
"".to_string()
Expand All @@ -756,8 +755,10 @@ fn show_extent_block(
for (dir_index, r) in dvec.iter().enumerate() {
print!("{:^32} ", dir_index);

max_tag_depth =
std::cmp::max(max_tag_depth, r.encryption_contexts(0).len());
max_tag_depth = std::cmp::max(
max_tag_depth,
r.encryption_contexts(0).is_some() as usize,
);
}
if !only_show_differences {
print!(" {:<5}", "DIFF");
Expand All @@ -779,17 +780,13 @@ fn show_extent_block(
let mut all_same_len = true;
let mut tags = Vec::with_capacity(dir_count);
for r in dvec.iter() {
let ctxs = r.encryption_contexts(0);
let ctxs =
r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
print!(
"{:^32} ",
if depth < ctxs.len() {
if let Some(ec) = ctxs[depth] {
tags.push(&ec.tag);
hex::encode(ec.tag)
} else {
all_same_len = false;
"".to_string()
}
tags.push(ctxs[depth].tag);
hex::encode(ctxs[depth].tag)
} else {
all_same_len = false;
"".to_string()
Expand Down Expand Up @@ -820,7 +817,8 @@ fn show_extent_block(
for (dir_index, r) in dvec.iter().enumerate() {
print!("{:^16} ", dir_index);

max_hash_depth = std::cmp::max(max_hash_depth, r.hashes(0).len());
max_hash_depth =
std::cmp::max(max_hash_depth, r.hashes(0).is_some() as usize);
}
if !only_show_differences {
print!(" {:<5}", "DIFF");
Expand All @@ -842,7 +840,7 @@ fn show_extent_block(
let mut all_same_len = true;
let mut hashes = Vec::with_capacity(dir_count);
for r in dvec.iter() {
let block_hashes = r.hashes(0);
let block_hashes = r.hashes(0).into_iter().collect::<Vec<_>>();
print!(
"{:^16} ",
if depth < block_hashes.len() {
Expand Down
29 changes: 24 additions & 5 deletions downstairs/src/extent_inner_raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::{
ExtentWrite, JobId, RegionDefinition,
};
use crucible_common::ExtentId;
use crucible_protocol::ReadBlockContext;

use itertools::Itertools;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -322,7 +323,15 @@ impl ExtentInner for RawInner {
let blocks = self.get_block_contexts_inner(
req.offset.0,
num_blocks,
|ctx, _block| ctx.map(|c| c.block_context),
|ctx, _block| match ctx {
None => ReadBlockContext::Empty,
Some(c) => match c.block_context.encryption_context {
Some(ctx) => ReadBlockContext::Encrypted { ctx },
None => ReadBlockContext::Unencrypted {
hash: c.block_context.hash,
},
},
},
)?;
cdt::extent__read__get__contexts__done!(|| {
(job_id.0, self.extent_number.0, num_blocks)
Expand Down Expand Up @@ -1604,6 +1613,7 @@ mod test {
}],
};
inner.write(JobId(10), &write, false, IOV_MAX_TEST)?;
let prev_hash = hash;

// The context should be in place, though we haven't flushed yet

Expand All @@ -1629,8 +1639,11 @@ mod test {
};
let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;

// We should not get back our data, because block 0 was written.
assert_ne!(resp.blocks, vec![Some(block_context)]);
// We should get back our old data, because block 0 was written.
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
);
assert_ne!(resp.data, BytesMut::from(data.as_ref()));
}

Expand All @@ -1656,7 +1669,10 @@ mod test {
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;

// We should get back our data! Block 1 was never written.
assert_eq!(resp.blocks, vec![Some(block_context)]);
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash }]
);
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
}

Expand Down Expand Up @@ -1883,7 +1899,10 @@ mod test {
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;

// We should get back our data! Block 1 was never written.
assert_eq!(resp.blocks, vec![Some(block_context)]);
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash }]
);
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
}

Expand Down
38 changes: 29 additions & 9 deletions downstairs/src/extent_inner_sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
ExtentWrite, JobId, RegionDefinition,
};
use crucible_common::{crucible_bail, ExtentId};
use crucible_protocol::EncryptionContext;
use crucible_protocol::{EncryptionContext, ReadBlockContext};

use anyhow::{bail, Result};
use itertools::Itertools;
Expand Down Expand Up @@ -301,10 +301,18 @@ impl SqliteMoreInner {
cdt::extent__read__get__contexts__done!(|| {
(job_id.0, self.extent_number.0, num_blocks)
});
// Convert from DownstairsBlockContext -> BlockContext
// Convert from DownstairsBlockContext -> ReadBlockContext
let blocks = block_contexts
.into_iter()
.map(|bs| bs.map(|b| b.block_context))
.map(|bs| match bs {
None => ReadBlockContext::Empty,
Some(b) => match b.block_context.encryption_context {
Some(ctx) => ReadBlockContext::Encrypted { ctx },
None => ReadBlockContext::Unencrypted {
hash: b.block_context.hash,
},
},
})
.collect();

// To avoid a `memset`, we're reading directly into uninitialized
Expand Down Expand Up @@ -1542,6 +1550,7 @@ mod test {

// We haven't flushed, but this should leave our context in place.
inner.fully_rehash_and_clean_all_stale_contexts(false)?;
let prev_hash = hash;

// Therefore, we expect that write_unwritten to the first block won't
// do anything.
Expand All @@ -1565,8 +1574,11 @@ mod test {
};
let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;

// We should not get back our data, because block 0 was written.
assert_ne!(resp.blocks, vec![Some(block_context)]);
// We should get back our old data, because block 0 was written.
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
);
assert_ne!(resp.data, BytesMut::from(data.as_ref()));
}

Expand All @@ -1592,7 +1604,10 @@ mod test {
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;

// We should get back our data! Block 1 was never written.
assert_eq!(resp.blocks, vec![Some(block_context)]);
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash }]
);
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
}

Expand Down Expand Up @@ -1653,7 +1668,10 @@ mod test {
let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;

// We should get back our data! Block 1 was never written.
assert_eq!(resp.blocks, vec![Some(block_context)]);
assert_eq!(
resp.blocks,
vec![ReadBlockContext::Unencrypted { hash }]
);
assert_eq!(resp.data, BytesMut::from(data.as_ref()));
}

Expand Down Expand Up @@ -1694,8 +1712,10 @@ mod test {
data: BytesMut::with_capacity(512 * 2),
};
let resp = inner.read(JobId(31), read(), IOV_MAX_TEST)?;
let expected_blocks: Vec<_> =
block_contexts.iter().map(|b| Some(*b)).collect();
let expected_blocks: Vec<_> = block_contexts
.iter()
.map(|b| ReadBlockContext::Unencrypted { hash: b.hash })
.collect();
assert_eq!(resp.blocks, expected_blocks);
assert_eq!(resp.data, BytesMut::from(data.as_ref()));

Expand Down
42 changes: 24 additions & 18 deletions downstairs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ use crucible_common::{
};
use crucible_protocol::{
BlockContext, CrucibleDecoder, JobId, Message, MessageWriter,
ReconciliationId, SnapshotDetails, CRUCIBLE_MESSAGE_VERSION,
ReadBlockContext, ReconciliationId, SnapshotDetails,
CRUCIBLE_MESSAGE_VERSION,
};
use repair_client::Client;

Expand Down Expand Up @@ -204,7 +205,7 @@ impl IntoIterator for RegionReadRequest {
/// Do not derive `Clone` on this type; it will be expensive and tempting to
/// call by accident!
pub(crate) struct RegionReadResponse {
blocks: Vec<Option<BlockContext>>,
blocks: Vec<ReadBlockContext>,
data: BytesMut,
uninit: BytesMut,
}
Expand Down Expand Up @@ -281,24 +282,27 @@ impl RegionReadResponse {
assert!(was_empty || prev_ptr == self.data.as_ptr());
}

/// Returns hashes for the given response
///
/// This is expensive and should only be used for debugging
pub fn hashes(&self, i: usize) -> Vec<u64> {
self.blocks[i].iter().map(|ctx| ctx.hash).collect()
/// Returns the hash for the given response (if unencrypted)
pub fn hashes(&self, i: usize) -> Option<u64> {
match self.blocks[i] {
ReadBlockContext::Empty | ReadBlockContext::Encrypted { .. } => {
None
}
ReadBlockContext::Unencrypted { hash } => Some(hash),
}
}

/// Returns encryption contexts for the given response
///
/// This is expensive and should only be used for debugging
pub fn encryption_contexts(
&self,
i: usize,
) -> Vec<Option<&crucible_protocol::EncryptionContext>> {
self.blocks[i]
.iter()
.map(|ctx| ctx.encryption_context.as_ref())
.collect()
) -> Option<crucible_protocol::EncryptionContext> {
match self.blocks[i] {
ReadBlockContext::Empty | ReadBlockContext::Unencrypted { .. } => {
None
}
ReadBlockContext::Encrypted { ctx } => Some(ctx),
}
}
}

Expand All @@ -307,7 +311,7 @@ impl RegionReadResponse {
/// Do not derive `Clone` on this type; it will be expensive and tempting to
/// call by accident!
pub(crate) struct ExtentReadResponse {
blocks: Vec<Option<BlockContext>>,
blocks: Vec<ReadBlockContext>,
/// At this point, the buffer must be fully initialized
data: BytesMut,
}
Expand Down Expand Up @@ -5444,9 +5448,11 @@ mod test {

assert_eq!(response.blocks.len(), 1);

let hashes = response.hashes(0);
assert_eq!(hashes.len(), 1);
assert_eq!(integrity_hash(&[&response.data[..]]), hashes[0],);
let hash = response.hashes(0);
assert_eq!(
integrity_hash(&[&response.data[..]]),
hash.unwrap()
);

read_data.extend_from_slice(&response.data[..]);
}
Expand Down
Loading

0 comments on commit 07cb9ed

Please sign in to comment.