From bea0468f1f62d7573e4c5167bc9b19e43f500ced Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Thu, 25 Jul 2024 12:56:37 -0400 Subject: [PATCH] fix(pageserver): allow incomplete history in btm-gc-compaction (#8500) This pull request (should) fix the failure of test_gc_feedback. See the explanation in the newly-added test case. Part of https://github.com/neondatabase/neon/issues/8002 Allow incomplete history for the compaction algorithm. Signed-off-by: Alex Chi Z --- pageserver/src/tenant.rs | 121 ++++++++++++++++++- pageserver/src/tenant/timeline/compaction.rs | 13 +- 2 files changed, 125 insertions(+), 9 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index f359326cc074..41d8a40941e9 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -7309,7 +7309,9 @@ mod tests { ( key, Lsn(0x80), - Value::WalRecord(NeonWalRecord::wal_append(";0x80")), + Value::Image(Bytes::copy_from_slice( + b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80", + )), ), ( key, @@ -7371,7 +7373,9 @@ mod tests { ), ( Lsn(0x80), - Value::WalRecord(NeonWalRecord::wal_append(";0x80")), + Value::Image(Bytes::copy_from_slice( + b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80", + )), ), ( Lsn(0x90), @@ -7380,7 +7384,118 @@ mod tests { ]), }; assert_eq!(res, expected_res); - // TODO: more tests with mixed image + delta, adding with k-merge test cases; e2e compaction test + + // We expect GC-compaction to run with the original GC. This would create a situation that + // the original GC algorithm removes some delta layers b/c there are full image coverage, + // therefore causing some keys to have an incomplete history below the lowest retain LSN. + // For example, we have + // ```plain + // init delta @ 0x10, image @ 0x20, delta @ 0x30 (gc_horizon), image @ 0x40. + // ``` + // Now the GC horizon moves up, and we have + // ```plain + // init delta @ 0x10, image @ 0x20, delta @ 0x30, image @ 0x40 (gc_horizon) + // ``` + // The original GC algorithm kicks in, and removes delta @ 0x10, image @ 0x20. + // We will end up with + // ```plain + // delta @ 0x30, image @ 0x40 (gc_horizon) + // ``` + // Now we run the GC-compaction, and this key does not have a full history. + // We should be able to handle this partial history and drop everything before the + // gc_horizon image. + + let history = vec![ + ( + key, + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append(";0x20")), + ), + ( + key, + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append(";0x30")), + ), + ( + key, + Lsn(0x40), + Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")), + ), + ( + key, + Lsn(0x50), + Value::WalRecord(NeonWalRecord::wal_append(";0x50")), + ), + ( + key, + Lsn(0x60), + Value::WalRecord(NeonWalRecord::wal_append(";0x60")), + ), + ( + key, + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + ), + ( + key, + Lsn(0x80), + Value::Image(Bytes::copy_from_slice( + b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80", + )), + ), + ( + key, + Lsn(0x90), + Value::WalRecord(NeonWalRecord::wal_append(";0x90")), + ), + ]; + let res = tline + .generate_key_retention(key, &history, Lsn(0x60), &[Lsn(0x40), Lsn(0x50)], 3) + .await + .unwrap(); + let expected_res = KeyHistoryRetention { + below_horizon: vec![ + ( + Lsn(0x40), + KeyLogAtLsn(vec![( + Lsn(0x40), + Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")), + )]), + ), + ( + Lsn(0x50), + KeyLogAtLsn(vec![( + Lsn(0x50), + Value::WalRecord(NeonWalRecord::wal_append(";0x50")), + )]), + ), + ( + Lsn(0x60), + KeyLogAtLsn(vec![( + Lsn(0x60), + Value::WalRecord(NeonWalRecord::wal_append(";0x60")), + )]), + ), + ], + above_horizon: KeyLogAtLsn(vec![ + ( + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + ), + ( + Lsn(0x80), + Value::Image(Bytes::copy_from_slice( + b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80", + )), + ), + ( + Lsn(0x90), + Value::WalRecord(NeonWalRecord::wal_append(";0x90")), + ), + ]), + }; + assert_eq!(res, expected_res); + Ok(()) } diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 487ff6cd8030..2c7ae911df62 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1122,9 +1122,10 @@ impl Timeline { ); } } - if let Value::WalRecord(rec) = &history[0].2 { - assert!(rec.will_init(), "no base image"); - } + // There was an assertion for no base image that checks if the first + // record in the history is `will_init` before, but it was removed. + // This is explained in the test cases for generate_key_retention. + // Search "incomplete history" for more information. for lsn in retain_lsn_below_horizon { assert!(lsn < &horizon, "retain lsn must be below horizon") } @@ -1200,9 +1201,6 @@ impl Timeline { false }; replay_history.extend(split_for_lsn.iter().map(|x| (*x).clone())); - if let Some((_, _, val)) = replay_history.first() { - assert!(val.will_init(), "invalid history, no base image"); - } // Only retain the items after the last image record for idx in (0..replay_history.len()).rev() { if replay_history[idx].2.will_init() { @@ -1210,6 +1208,9 @@ impl Timeline { break; } } + if let Some((_, _, val)) = replay_history.first() { + assert!(val.will_init(), "invalid history, no base image"); + } if generate_image && records_since_last_image > 0 { records_since_last_image = 0; let history = std::mem::take(&mut replay_history);