Simplify block context for reads (#1391)

Right now, the `ReadResponse` sends back a `Vec<Option<BlockContext>>`, i.e. an `Option<BlockContext>` for each block. The `BlockContext` in turn contains ```rust pub struct BlockContext { pub hash: u64, pub encryption_context: Option<EncryptionContext>, } ``` If `encryption_context` is populated, then sending `hash` to the Upstairs is unnecessary: we are using authenticated encryption, so we know whether the block data + context is valid based on whether it decrypted successfully. This PR removes `hash` from the `ReadResponse` message in favor of a new `enum`: ```rust #[derive(Debug, PartialEq, Copy, Clone, Serialize, Deserialize)] pub enum ReadBlockContext { Empty, Encrypted { ctx: EncryptionContext }, Unencrypted { hash: u64 }, } ``` This does not change the on-disk format or write message format, which both continue to use hashes: - When **sending** data to the Downstairs, the hash (in `BlockContext`) lets us detect corruption in transit. We can't use the encryption context here, because the Downstairs isn't allowed to have encryption keys - When recovering from a bad write, `on_disk_hash` is used to figure out which context slot is valid. `on_disk_hash` is never sent over the network¹ This PR is a step towards [RFD 490 § Metadata reduction](https://rfd.shared.oxide.computer/rfd/490#_metadata_reduction), which proposes to **not** store block hashes for encrypted data on disk. If in the future we don't store block hashes for encrypted data, we would not be able to send them over the network; this PR removes that future hurdle. However, the PR stands alone as a small optimization (39 → 32 bytes per block) that simplifies program behavior (no need to think about what happens if encryption fails but the hash matches, or vice versa). ¹ `on_disk_hash` is also _technically_ superfluous if we already have `hash` (see #1161), but this PR doesn't change it
oxidecomputer · Jul 31, 2024 · 07cb9ed · 07cb9ed
1 parent 5951c65
commit 07cb9ed
Show file tree

Hide file tree

Showing 14 changed files with 286 additions and 261 deletions.
diff --git a/downstairs/src/dump.rs b/downstairs/src/dump.rs
@@ -699,8 +699,10 @@ fn show_extent_block(
         for (dir_index, r) in dvec.iter().enumerate() {
             print!("{:^24} ", dir_index);
 
-            max_nonce_depth =
-                std::cmp::max(max_nonce_depth, r.encryption_contexts(0).len());
+            max_nonce_depth = std::cmp::max(
+                max_nonce_depth,
+                r.encryption_contexts(0).is_some() as usize,
+            );
         }
         if !only_show_differences {
             print!(" {:<5}", "DIFF");
@@ -722,17 +724,14 @@ fn show_extent_block(
             let mut all_same_len = true;
             let mut nonces = Vec::with_capacity(dir_count);
             for r in dvec.iter() {
-                let ctxs = r.encryption_contexts(0);
+                // TODO this can only be 0 or 1
+                let ctxs =
+                    r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
                 print!(
                     "{:^24} ",
                     if depth < ctxs.len() {
-                        if let Some(ec) = ctxs[depth] {
-                            nonces.push(&ec.nonce);
-                            hex::encode(ec.nonce)
-                        } else {
-                            all_same_len = false;
-                            "".to_string()
-                        }
+                        nonces.push(ctxs[depth].nonce);
+                        hex::encode(ctxs[depth].nonce)
                     } else {
                         all_same_len = false;
                         "".to_string()
@@ -756,8 +755,10 @@ fn show_extent_block(
         for (dir_index, r) in dvec.iter().enumerate() {
             print!("{:^32} ", dir_index);
 
-            max_tag_depth =
-                std::cmp::max(max_tag_depth, r.encryption_contexts(0).len());
+            max_tag_depth = std::cmp::max(
+                max_tag_depth,
+                r.encryption_contexts(0).is_some() as usize,
+            );
         }
         if !only_show_differences {
             print!(" {:<5}", "DIFF");
@@ -779,17 +780,13 @@ fn show_extent_block(
             let mut all_same_len = true;
             let mut tags = Vec::with_capacity(dir_count);
             for r in dvec.iter() {
-                let ctxs = r.encryption_contexts(0);
+                let ctxs =
+                    r.encryption_contexts(0).into_iter().collect::<Vec<_>>();
                 print!(
                     "{:^32} ",
                     if depth < ctxs.len() {
-                        if let Some(ec) = ctxs[depth] {
-                            tags.push(&ec.tag);
-                            hex::encode(ec.tag)
-                        } else {
-                            all_same_len = false;
-                            "".to_string()
-                        }
+                        tags.push(ctxs[depth].tag);
+                        hex::encode(ctxs[depth].tag)
                     } else {
                         all_same_len = false;
                         "".to_string()
@@ -820,7 +817,8 @@ fn show_extent_block(
         for (dir_index, r) in dvec.iter().enumerate() {
             print!("{:^16} ", dir_index);
 
-            max_hash_depth = std::cmp::max(max_hash_depth, r.hashes(0).len());
+            max_hash_depth =
+                std::cmp::max(max_hash_depth, r.hashes(0).is_some() as usize);
         }
         if !only_show_differences {
             print!(" {:<5}", "DIFF");
@@ -842,7 +840,7 @@ fn show_extent_block(
             let mut all_same_len = true;
             let mut hashes = Vec::with_capacity(dir_count);
             for r in dvec.iter() {
-                let block_hashes = r.hashes(0);
+                let block_hashes = r.hashes(0).into_iter().collect::<Vec<_>>();
                 print!(
                     "{:^16} ",
                     if depth < block_hashes.len() {

diff --git a/downstairs/src/extent_inner_raw.rs b/downstairs/src/extent_inner_raw.rs
@@ -14,6 +14,7 @@ use crate::{
     ExtentWrite, JobId, RegionDefinition,
 };
 use crucible_common::ExtentId;
+use crucible_protocol::ReadBlockContext;
 
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
@@ -322,7 +323,15 @@ impl ExtentInner for RawInner {
         let blocks = self.get_block_contexts_inner(
             req.offset.0,
             num_blocks,
-            |ctx, _block| ctx.map(|c| c.block_context),
+            |ctx, _block| match ctx {
+                None => ReadBlockContext::Empty,
+                Some(c) => match c.block_context.encryption_context {
+                    Some(ctx) => ReadBlockContext::Encrypted { ctx },
+                    None => ReadBlockContext::Unencrypted {
+                        hash: c.block_context.hash,
+                    },
+                },
+            },
         )?;
         cdt::extent__read__get__contexts__done!(|| {
             (job_id.0, self.extent_number.0, num_blocks)
@@ -1604,6 +1613,7 @@ mod test {
             }],
         };
         inner.write(JobId(10), &write, false, IOV_MAX_TEST)?;
+        let prev_hash = hash;
 
         // The context should be in place, though we haven't flushed yet
 
@@ -1629,8 +1639,11 @@ mod test {
             };
             let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;
 
-            // We should not get back our data, because block 0 was written.
-            assert_ne!(resp.blocks, vec![Some(block_context)]);
+            // We should get back our old data, because block 0 was written.
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
+            );
             assert_ne!(resp.data, BytesMut::from(data.as_ref()));
         }
 
@@ -1656,7 +1669,10 @@ mod test {
             let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
 
             // We should get back our data! Block 1 was never written.
-            assert_eq!(resp.blocks, vec![Some(block_context)]);
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash }]
+            );
             assert_eq!(resp.data, BytesMut::from(data.as_ref()));
         }
 
@@ -1883,7 +1899,10 @@ mod test {
             let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
 
             // We should get back our data! Block 1 was never written.
-            assert_eq!(resp.blocks, vec![Some(block_context)]);
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash }]
+            );
             assert_eq!(resp.data, BytesMut::from(data.as_ref()));
         }
 

diff --git a/downstairs/src/extent_inner_sqlite.rs b/downstairs/src/extent_inner_sqlite.rs
@@ -9,7 +9,7 @@ use crate::{
     ExtentWrite, JobId, RegionDefinition,
 };
 use crucible_common::{crucible_bail, ExtentId};
-use crucible_protocol::EncryptionContext;
+use crucible_protocol::{EncryptionContext, ReadBlockContext};
 
 use anyhow::{bail, Result};
 use itertools::Itertools;
@@ -301,10 +301,18 @@ impl SqliteMoreInner {
         cdt::extent__read__get__contexts__done!(|| {
             (job_id.0, self.extent_number.0, num_blocks)
         });
-        // Convert from DownstairsBlockContext -> BlockContext
+        // Convert from DownstairsBlockContext -> ReadBlockContext
         let blocks = block_contexts
             .into_iter()
-            .map(|bs| bs.map(|b| b.block_context))
+            .map(|bs| match bs {
+                None => ReadBlockContext::Empty,
+                Some(b) => match b.block_context.encryption_context {
+                    Some(ctx) => ReadBlockContext::Encrypted { ctx },
+                    None => ReadBlockContext::Unencrypted {
+                        hash: b.block_context.hash,
+                    },
+                },
+            })
             .collect();
 
         // To avoid a `memset`, we're reading directly into uninitialized
@@ -1542,6 +1550,7 @@ mod test {
 
         // We haven't flushed, but this should leave our context in place.
         inner.fully_rehash_and_clean_all_stale_contexts(false)?;
+        let prev_hash = hash;
 
         // Therefore, we expect that write_unwritten to the first block won't
         // do anything.
@@ -1565,8 +1574,11 @@ mod test {
             };
             let resp = inner.read(JobId(21), read, IOV_MAX_TEST)?;
 
-            // We should not get back our data, because block 0 was written.
-            assert_ne!(resp.blocks, vec![Some(block_context)]);
+            // We should get back our old data, because block 0 was written.
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash: prev_hash }]
+            );
             assert_ne!(resp.data, BytesMut::from(data.as_ref()));
         }
 
@@ -1592,7 +1604,10 @@ mod test {
             let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
 
             // We should get back our data! Block 1 was never written.
-            assert_eq!(resp.blocks, vec![Some(block_context)]);
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash }]
+            );
             assert_eq!(resp.data, BytesMut::from(data.as_ref()));
         }
 
@@ -1653,7 +1668,10 @@ mod test {
             let resp = inner.read(JobId(31), read, IOV_MAX_TEST)?;
 
             // We should get back our data! Block 1 was never written.
-            assert_eq!(resp.blocks, vec![Some(block_context)]);
+            assert_eq!(
+                resp.blocks,
+                vec![ReadBlockContext::Unencrypted { hash }]
+            );
             assert_eq!(resp.data, BytesMut::from(data.as_ref()));
         }
 
@@ -1694,8 +1712,10 @@ mod test {
             data: BytesMut::with_capacity(512 * 2),
         };
         let resp = inner.read(JobId(31), read(), IOV_MAX_TEST)?;
-        let expected_blocks: Vec<_> =
-            block_contexts.iter().map(|b| Some(*b)).collect();
+        let expected_blocks: Vec<_> = block_contexts
+            .iter()
+            .map(|b| ReadBlockContext::Unencrypted { hash: b.hash })
+            .collect();
         assert_eq!(resp.blocks, expected_blocks);
         assert_eq!(resp.data, BytesMut::from(data.as_ref()));
 

diff --git a/downstairs/src/lib.rs b/downstairs/src/lib.rs
@@ -20,7 +20,8 @@ use crucible_common::{
 };
 use crucible_protocol::{
     BlockContext, CrucibleDecoder, JobId, Message, MessageWriter,
-    ReconciliationId, SnapshotDetails, CRUCIBLE_MESSAGE_VERSION,
+    ReadBlockContext, ReconciliationId, SnapshotDetails,
+    CRUCIBLE_MESSAGE_VERSION,
 };
 use repair_client::Client;
 
@@ -204,7 +205,7 @@ impl IntoIterator for RegionReadRequest {
 /// Do not derive `Clone` on this type; it will be expensive and tempting to
 /// call by accident!
 pub(crate) struct RegionReadResponse {
-    blocks: Vec<Option<BlockContext>>,
+    blocks: Vec<ReadBlockContext>,
     data: BytesMut,
     uninit: BytesMut,
 }
@@ -281,24 +282,27 @@ impl RegionReadResponse {
         assert!(was_empty || prev_ptr == self.data.as_ptr());
     }
 
-    /// Returns hashes for the given response
-    ///
-    /// This is expensive and should only be used for debugging
-    pub fn hashes(&self, i: usize) -> Vec<u64> {
-        self.blocks[i].iter().map(|ctx| ctx.hash).collect()
+    /// Returns the hash for the given response (if unencrypted)
+    pub fn hashes(&self, i: usize) -> Option<u64> {
+        match self.blocks[i] {
+            ReadBlockContext::Empty | ReadBlockContext::Encrypted { .. } => {
+                None
+            }
+            ReadBlockContext::Unencrypted { hash } => Some(hash),
+        }
     }
 
     /// Returns encryption contexts for the given response
-    ///
-    /// This is expensive and should only be used for debugging
     pub fn encryption_contexts(
         &self,
         i: usize,
-    ) -> Vec<Option<&crucible_protocol::EncryptionContext>> {
-        self.blocks[i]
-            .iter()
-            .map(|ctx| ctx.encryption_context.as_ref())
-            .collect()
+    ) -> Option<crucible_protocol::EncryptionContext> {
+        match self.blocks[i] {
+            ReadBlockContext::Empty | ReadBlockContext::Unencrypted { .. } => {
+                None
+            }
+            ReadBlockContext::Encrypted { ctx } => Some(ctx),
+        }
     }
 }
 
@@ -307,7 +311,7 @@ impl RegionReadResponse {
 /// Do not derive `Clone` on this type; it will be expensive and tempting to
 /// call by accident!
 pub(crate) struct ExtentReadResponse {
-    blocks: Vec<Option<BlockContext>>,
+    blocks: Vec<ReadBlockContext>,
     /// At this point, the buffer must be fully initialized
     data: BytesMut,
 }
@@ -5444,9 +5448,11 @@ mod test {
 
                 assert_eq!(response.blocks.len(), 1);
 
-                let hashes = response.hashes(0);
-                assert_eq!(hashes.len(), 1);
-                assert_eq!(integrity_hash(&[&response.data[..]]), hashes[0],);
+                let hash = response.hashes(0);
+                assert_eq!(
+                    integrity_hash(&[&response.data[..]]),
+                    hash.unwrap()
+                );
 
                 read_data.extend_from_slice(&response.data[..]);
             }