Skip to content

Commit

Permalink
finish compaction
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi <[email protected]>
  • Loading branch information
skyzh committed Jan 18, 2024
1 parent ce33f62 commit 53cb1fe
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 43 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
.vscode/
sync-tmp/
mini-lsm.db/
13 changes: 5 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ We are working on a new version of the mini-lsm tutorial that is split into 3 we
| 1.4 | Merge Iterators ||||
| 1.5 | Storage Engine - Read Path ||||
| 1.6 | Storage Engine - Write Path ||||
| 2.1 | Compaction - Get Started || 🚧 | 🚧 |
| 2.2 | Compaction Strategy - Tiered || | |
| 2.3 | Compaction Strategy - Leveled || | |
| 2.4 | Manifest | | | |
| 2.5 | Write-Ahead Log | | | |
| 2.6 | Bloom Filter and Key Compression | | | |
| 1.7 | Bloom Filter and Key Compression | | | |
| 2.1 | Compaction Introduction || 🚧 | 🚧 |
| 2.2 | Compaction Strategy - Simple || 🚧 | 🚧 |
| 2.3 | Compaction Strategy - Tiered || | |
| 2.4 | Compaction Strategy - Leveled || | |
| 2.5 | Manifest | | | |
| 2.6 | Write-Ahead Log | | | |
| 3.1 | Timestamp Encoding + Prefix Bloom Filter | | | |
| 3.2 | Snapshot Read | | | |
| 3.3 | Watermark and Garbage Collection | | | |
Expand Down
1 change: 1 addition & 0 deletions mini-lsm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ouroboros = "0.15"
moka = "0.9"
clap = { version = "4.4.17", features = ["derive"] }
rand = "0.8.5"
crossbeam-channel = "0.5.11"

[dev-dependencies]
tempfile = "3"
2 changes: 1 addition & 1 deletion mini-lsm/src/bin/compaction_simulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ fn main() {
iterations,
} => {
let controller = TieredCompactionController::new(TieredCompactionOptions {
level0_file_num_compaction_trigger,
num_tiers: level0_file_num_compaction_trigger,
max_size_amplification_percent,
size_ratio,
min_merge_width,
Expand Down
62 changes: 62 additions & 0 deletions mini-lsm/src/bin/minilsm_cli.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use std::time::Duration;

use anyhow::Result;
use mini_lsm::compact::{CompactionOptions, SimpleLeveledCompactionOptions};
use mini_lsm::lsm_storage::{LsmStorageOptions, MiniLsm};

fn main() -> Result<()> {
let lsm = MiniLsm::open(
"mini-lsm.db",
LsmStorageOptions {
block_size: 4096,
target_sst_size: 2 << 20,
compaction_options: CompactionOptions::Simple(SimpleLeveledCompactionOptions {
size_ratio_percent: 200,
level0_file_num_compaction_trigger: 2,
max_levels: 4,
}),
},
)?;
let mut epoch = 0;
loop {
let mut line = String::new();
std::io::stdin().read_line(&mut line)?;
let line = line.trim().to_string();
if line.starts_with("fill ") {
let Some((_, options)) = line.split_once(' ') else {
println!("invalid command");
continue;
};
let Some((begin, end)) = options.split_once(' ') else {
println!("invalid command");
continue;
};
let begin = begin.parse::<u64>()?;
let end = end.parse::<u64>()?;

for i in begin..=end {
lsm.put(
format!("{}", i).as_bytes(),
format!("value{}@{}", i, epoch).as_bytes(),
)?;
}

println!("{} values filled with epoch {}", end - begin + 1, epoch);
} else if line.starts_with("get ") {
let Some((_, key)) = line.split_once(' ') else {
println!("invalid command");
continue;
};
if let Some(value) = lsm.get(key.as_bytes())? {
println!("{}={:?}", key, value);
} else {
println!("{} not exist", key);
}
} else if line == "flush" {
lsm.force_flush_imm_memtables()?;
} else {
println!("invalid command: {}", line);
}
epoch += 1;
}
}
155 changes: 141 additions & 14 deletions mini-lsm/src/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod simple_leveled;
mod tiered;

use std::sync::Arc;
use std::time::Duration;

use anyhow::Result;
pub use leveled::{LeveledCompactionController, LeveledCompactionOptions, LeveledCompactionTask};
Expand All @@ -13,18 +14,25 @@ pub use tiered::{TieredCompactionController, TieredCompactionOptions, TieredComp

use crate::iterators::merge_iterator::MergeIterator;
use crate::iterators::StorageIterator;
use crate::lsm_storage::LsmStorageInner;
use crate::lsm_storage::{LsmStorageInner, LsmStorageState};
use crate::table::{SsTable, SsTableBuilder, SsTableIterator};

pub(crate) enum CompactionTask {
Leveled(LeveledCompactionTask),
Tiered(TieredCompactionTask),
Simple(SimpleLeveledCompactionTask),
ForceFullCompaction(Vec<usize>),
}

struct CompactOptions {
block_size: usize,
target_sst_size: usize,
impl CompactionTask {
fn compact_to_bottom_level(&self) -> bool {
match self {
CompactionTask::ForceFullCompaction(_) => true,
CompactionTask::Leveled(task) => task.is_lower_level_bottom_level,
CompactionTask::Simple(task) => task.is_lower_level_bottom_level,
CompactionTask::Tiered(task) => task.bottom_tier_included,
}
}
}

pub(crate) enum CompactionController {
Expand All @@ -34,6 +42,43 @@ pub(crate) enum CompactionController {
NoCompaction,
}

impl CompactionController {
fn generate_compaction_task(&self, snapshot: &LsmStorageState) -> Option<CompactionTask> {
match self {
CompactionController::Leveled(ctrl) => ctrl
.generate_compaction_task(&snapshot)
.map(CompactionTask::Leveled),
CompactionController::Simple(ctrl) => ctrl
.generate_compaction_task(&snapshot)
.map(CompactionTask::Simple),
CompactionController::Tiered(ctrl) => ctrl
.generate_compaction_task(&snapshot)
.map(CompactionTask::Tiered),
CompactionController::NoCompaction => unreachable!(),
}
}

fn apply_compaction_result(
&self,
snapshot: &LsmStorageState,
task: &CompactionTask,
output: &[usize],
) -> (LsmStorageState, Vec<usize>) {
match (self, task) {
(CompactionController::Leveled(ctrl), CompactionTask::Leveled(task)) => {
ctrl.apply_compaction_result(&snapshot, task, output)
}
(CompactionController::Simple(ctrl), CompactionTask::Simple(task)) => {
ctrl.apply_compaction_result(&snapshot, task, output)
}
(CompactionController::Tiered(ctrl), CompactionTask::Tiered(task)) => {
ctrl.apply_compaction_result(&snapshot, task, output)
}
_ => unreachable!(),
}
}
}

impl CompactionController {
pub fn flush_to_l0(&self) -> bool {
if let Self::Leveled(_) | Self::Simple(_) | Self::NoCompaction = self {
Expand All @@ -57,12 +102,37 @@ pub enum CompactionOptions {
}

impl LsmStorageInner {
#[allow(dead_code)]
fn compact(
&self,
tables: Vec<Arc<SsTable>>,
options: CompactOptions,
) -> Result<Vec<Arc<SsTable>>> {
fn compact(&self, task: &CompactionTask) -> Result<Vec<Arc<SsTable>>> {
let table_ids = match task {
CompactionTask::Leveled(task) => task
.lower_level_sst_ids
.iter()
.copied()
.chain(task.upper_level_sst_ids.iter().copied())
.collect::<Vec<_>>(),
CompactionTask::Simple(task) => task
.lower_level_sst_ids
.iter()
.copied()
.chain(task.upper_level_sst_ids.iter().copied())
.collect::<Vec<_>>(),
CompactionTask::Tiered(task) => task
.tiers
.iter()
.map(|(_, files)| files)
.flatten()
.copied()
.collect::<Vec<_>>(),
CompactionTask::ForceFullCompaction(l0_ssts) => l0_ssts.clone(),
};
let tables: Vec<Arc<SsTable>> = {
let state = self.state.read();
table_ids
.iter()
.map(|id| state.sstables.get(id).unwrap().clone())
.collect::<Vec<_>>()
};

let mut iters = Vec::new();
iters.reserve(tables.len());
for table in tables.iter() {
Expand All @@ -75,11 +145,11 @@ impl LsmStorageInner {
let mut builder = None;
let mut new_sst = vec![];

let compact_to_bottom_level = false;
let compact_to_bottom_level = task.compact_to_bottom_level();

while iter.is_valid() {
if builder.is_none() {
builder = Some(SsTableBuilder::new(options.block_size));
builder = Some(SsTableBuilder::new(self.options.block_size));
}
let builder_inner = builder.as_mut().unwrap();
if compact_to_bottom_level {
Expand All @@ -91,7 +161,7 @@ impl LsmStorageInner {
}
iter.next()?;

if builder_inner.estimated_size() >= options.target_sst_size {
if builder_inner.estimated_size() >= self.options.target_sst_size {
let sst_id = self.next_sst_id(); // lock dropped here
let builder = builder.take().unwrap();
let sst = Arc::new(builder.build(
Expand All @@ -114,10 +184,67 @@ impl LsmStorageInner {
Ok(new_sst)
}

fn trigger_compaction(&self) -> Result<()> {
let snapshot = {
let state = self.state.read();
state.clone()
};
let task = self
.compaction_controller
.generate_compaction_task(&snapshot);
let Some(task) = task else {
return Ok(());
};
println!("running compaction task");
let sstables = self.compact(&task)?;
let output = sstables.iter().map(|x| x.sst_id()).collect::<Vec<_>>();
let ssts_to_remove = {
let _state_lock = self.state_lock.lock();
let (mut snapshot, files_to_remove) = self
.compaction_controller
.apply_compaction_result(&self.state.read(), &task, &output);
let mut ssts_to_remove = Vec::with_capacity(files_to_remove.len());
for file_to_remove in &files_to_remove {
let result = snapshot.sstables.remove(file_to_remove);
assert!(result.is_some());
ssts_to_remove.push(result.unwrap());
}
for file_to_add in sstables {
let result = snapshot.sstables.insert(file_to_add.sst_id(), file_to_add);
assert!(result.is_none());
}
let mut state = self.state.write();
*state = Arc::new(snapshot);
ssts_to_remove
};
for sst in ssts_to_remove {
std::fs::remove_file(self.path_of_sst(sst.sst_id()))?;
}
Ok(())
}

pub(crate) fn spawn_compaction_thread(
self: &Arc<Self>,
rx: std::sync::mpsc::Receiver<()>,
rx: crossbeam_channel::Receiver<()>,
) -> Result<Option<std::thread::JoinHandle<()>>> {
if let CompactionOptions::Leveled(_)
| CompactionOptions::Simple(_)
| CompactionOptions::Tiered(_) = self.options.compaction_options
{
let this = self.clone();
let handle = std::thread::spawn(move || {
let ticker = crossbeam_channel::tick(Duration::from_millis(50));
loop {
crossbeam_channel::select! {
recv(ticker) -> _ => if let Err(e) = this.trigger_compaction() {
eprintln!("compaction failed: {}", e);
},
recv(rx) -> _ => return
}
}
});
return Ok(Some(handle));
}
Ok(None)
}
}
3 changes: 3 additions & 0 deletions mini-lsm/src/compact/leveled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub struct LeveledCompactionTask {
pub upper_level_sst_ids: Vec<usize>,
pub lower_level: usize,
pub lower_level_sst_ids: Vec<usize>,
pub is_lower_level_bottom_level: bool,
}

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -115,6 +116,7 @@ impl LeveledCompactionController {
&snapshot.l0_sstables,
base_level,
),
is_lower_level_bottom_level: base_level == self.options.max_levels,
});
}

Expand Down Expand Up @@ -143,6 +145,7 @@ impl LeveledCompactionController {
&[selected_sst],
level + 1,
),
is_lower_level_bottom_level: level + 1 == self.options.max_levels,
});
}
None
Expand Down
2 changes: 2 additions & 0 deletions mini-lsm/src/compact/simple_leveled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub struct SimpleLeveledCompactionTask {
pub upper_level_sst_ids: Vec<usize>,
pub lower_level: usize,
pub lower_level_sst_ids: Vec<usize>,
pub is_lower_level_bottom_level: bool,
}

pub struct SimpleLeveledCompactionController {
Expand Down Expand Up @@ -57,6 +58,7 @@ impl SimpleLeveledCompactionController {
},
lower_level,
lower_level_sst_ids: snapshot.levels[lower_level - 1].1.clone(),
is_lower_level_bottom_level: lower_level == self.options.max_levels,
});
}
}
Expand Down
Loading

0 comments on commit 53cb1fe

Please sign in to comment.