From dabc6367d73381d190c3a962255665eedc2e6610 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 11 Aug 2022 17:42:45 +0800 Subject: [PATCH 01/22] Support secondary-dir configuration. This commit builds a prototype to support secondary dir configuration. Signed-off-by: Lucasliang --- src/config.rs | 10 ++ src/file_pipe_log/pipe.rs | 272 +++++++++++++++++++++++++++++- src/file_pipe_log/pipe_builder.rs | 189 ++++++++++++++++++++- 3 files changed, 464 insertions(+), 7 deletions(-) diff --git a/src/config.rs b/src/config.rs index 94b791e4..823d4ac1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -31,6 +31,15 @@ pub struct Config { /// Default: "" pub dir: String, + /// Assistant directory to store log files. Will create on startup if + /// set and not exists. + /// + /// Newly logs will be put into this dir when the main `dir` is full + /// or not accessible. + /// + /// Default: "" + pub sub_dir: Option, + /// How to deal with file corruption during recovery. /// /// Default: "tolerate-tail-corruption". @@ -97,6 +106,7 @@ impl Default for Config { #[allow(unused_mut)] let mut cfg = Config { dir: "".to_owned(), + sub_dir: None, recovery_mode: RecoveryMode::TolerateTailCorruption, recovery_read_block_size: ReadableSize::kb(16), recovery_threads: 4, diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index f5f6e4b1..c11668c3 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -1,6 +1,6 @@ // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0. -use std::collections::VecDeque; +use std::collections::{HashSet, VecDeque}; use std::fs::File; use std::path::PathBuf; use std::sync::Arc; @@ -9,6 +9,7 @@ use crossbeam::utils::CachePadded; use fail::fail_point; use log::error; use parking_lot::{Mutex, MutexGuard, RwLock}; +use strum::{EnumIter, IntoEnumIterator}; use crate::config::Config; use crate::env::FileSystem; @@ -20,10 +21,174 @@ use crate::{perf_context, Error, Result}; use super::format::{FileNameExt, LogFileFormat}; use super::log_file::{build_file_reader, build_file_writer, LogFileWriter}; +#[repr(u8)] +#[derive(Clone, Copy, PartialEq, Eq, Debug, EnumIter)] +pub enum StorageDirType { + Main = 0, + Secondary = 1, +} + +#[derive(Debug, Clone)] +struct SingleStorageInfo { + dir: String, + stale_files: HashSet, + in_use_files: HashSet, +} + +impl SingleStorageInfo { + fn new(dir: String) -> Self { + Self { + dir, + stale_files: HashSet::default(), + in_use_files: HashSet::default(), + } + } + + fn has_stale_file(&self) -> bool { + !self.stale_files.is_empty() + } + + fn find_stale_file(&self, file_seq: FileSeq) -> bool { + matches!(self.stale_files.get(&file_seq), Some(_)) + } + + fn find_in_use_file(&self, file_seq: FileSeq) -> bool { + matches!(self.in_use_files.get(&file_seq), Some(_)) + } + + fn pop_stale_file(&mut self, file_seq: FileSeq) -> bool { + self.stale_files.remove(&file_seq) + } + + fn push_stale_file(&mut self, file_seq: FileSeq) -> bool { + self.stale_files.insert(file_seq) + } + + fn pop_in_use_file(&mut self, file_seq: FileSeq) -> bool { + self.in_use_files.remove(&file_seq) + } + + fn push_in_use_file(&mut self, file_seq: FileSeq) -> bool { + self.in_use_files.insert(file_seq) + } +} + +#[derive(Debug, Clone)] +struct StorageInfo { + storage: Vec, +} + +impl StorageInfo { + // fn get_disk_stat(&self, target: StorageDirType) -> Result {} + + fn new(dir: String, sub_dir: Option) -> Self { + let mut storage = vec![SingleStorageInfo::new(dir); 1]; + if let Some(sub) = sub_dir { + storage.push(SingleStorageInfo::new(sub)); + } + Self { storage } + } + + fn pop_file(&mut self, file_seq: FileSeq) -> Option { + for t in StorageDirType::iter() { + let idx = t as usize; + if idx >= self.storage.len() { + break; + } + if self.storage[idx].pop_stale_file(file_seq) + || self.storage[idx].pop_in_use_file(file_seq) + { + return Some(t); + } + } + None + } + + fn push_in_use_file(&mut self, file_seq: FileSeq, storage_type: StorageDirType) -> bool { + let idx = storage_type as usize; + if idx >= self.storage.len() { + return false; + } + self.storage[idx].push_in_use_file(file_seq) + } + + fn push_stale_file(&mut self, file_seq: FileSeq, storage_type: StorageDirType) -> bool { + let idx = storage_type as usize; + if idx >= self.storage.len() { + return false; + } + self.storage[idx].push_stale_file(file_seq) + } + + fn get_stale_files_dir(&self) -> Option<(String, StorageDirType)> { + for t in StorageDirType::iter() { + let idx = t as usize; + if idx >= self.storage.len() { + break; + } + if self.storage[idx].has_stale_file() { + return Some((self.storage[idx].dir.clone(), t)); + } + } + None + } + + fn get_free_dir(&self, target_size: usize) -> Option<(String, StorageDirType)> { + for t in StorageDirType::iter() { + let idx = t as usize; + if idx >= self.storage.len() { + break; + } + let disk_stats = match fs2::statvfs(&self.storage[idx].dir) { + Err(e) => { + error!( + "get disk stat for raft engine failed, dir_path: {}, err: {}", + &self.storage[idx].dir, e + ); + return None; + } + Ok(stats) => stats, + }; + if target_size <= disk_stats.available_space() as usize { + return Some((self.storage[idx].dir.clone(), t)); + } + } + None + } + + fn find_dir(&self, file_seq: FileSeq) -> Option<(String, StorageDirType)> { + for t in StorageDirType::iter() { + let idx = t as usize; + if idx >= self.storage.len() { + break; + } + if self.storage[idx].find_stale_file(file_seq) + || self.storage[idx].find_in_use_file(file_seq) + { + return Some((self.storage[idx].dir.clone(), t)); + } + } + None + } + + fn sync_all_dir(&mut self) -> Result<()> { + for t in StorageDirType::iter() { + let idx = t as usize; + if idx >= self.storage.len() { + break; + } + let path = PathBuf::from(&self.storage[idx].dir); + std::fs::File::open(path).and_then(|d| d.sync_all())?; + } + Ok(()) + } +} + #[derive(Debug)] pub struct FileWithFormat { pub handle: Arc, pub format: LogFileFormat, + pub storage_type: StorageDirType, } struct FileCollection { @@ -75,7 +240,7 @@ struct ActiveFile { /// A file-based log storage that arranges files as one single queue. pub(super) struct SinglePipe { queue: LogQueue, - dir: String, + /* dir: String, */ file_format: LogFileFormat, target_file_size: usize, bytes_per_sync: usize, @@ -89,6 +254,8 @@ pub(super) struct SinglePipe { /// `active_file` must be locked first to acquire both `files` and /// `active_file` active_file: CachePadded>>, + /// Info of storage dir. + storage: CachePadded>, } impl Drop for SinglePipe { @@ -100,12 +267,15 @@ impl Drop for SinglePipe { // Here, we also should release the unnecessary disk space // occupied by stale files. let files = self.files.write(); + let storage = self.storage.read(); for seq in files.first_seq..files.first_seq_in_use { + let dir = storage.find_dir(seq); + debug_assert!(dir.is_some()); let file_id = FileId { queue: self.queue, seq, }; - let path = file_id.build_file_path(&self.dir); + let path = file_id.build_file_path(&dir.unwrap().0); if let Err(e) = self.file_system.delete(&path) { error!( "error while deleting stale file: {}, err_msg: {}", @@ -143,17 +313,29 @@ impl SinglePipe { } } + let mut storage = StorageInfo::new(cfg.dir.clone(), cfg.sub_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; + let (dir, dir_type) = match storage.get_free_dir(cfg.target_file_size.0 as usize) { + Some((d, t)) => (d, t), + None => { + // No space for writing. + return Err(Error::Other(box_err!( + "no free space for recording new logs." + ))); + } + }; let file_id = FileId { queue, seq: first_seq, }; - let fd = Arc::new(file_system.create(&file_id.build_file_path(&cfg.dir))?); + // let fd = Arc::new(file_system.create(&file_id.build_file_path(&cfg.dir))?); + let fd = Arc::new(file_system.create(&file_id.build_file_path(&dir))?); fds.push_back(FileWithFormat { handle: fd, format: LogFileFormat::new(cfg.format_version, alignment), + storage_type: dir_type, }); first_seq } else { @@ -161,6 +343,7 @@ impl SinglePipe { }; for seq in first_seq..=active_seq { + storage.push_in_use_file(seq, fds[(seq - first_seq) as usize].storage_type); for listener in &listeners { listener.post_new_log_file(FileId { queue, seq }); } @@ -182,7 +365,7 @@ impl SinglePipe { let total_files = fds.len(); let pipe = Self { queue, - dir: cfg.dir.clone(), + /* dir: cfg.dir.clone(), */ file_format: LogFileFormat::new(cfg.format_version, alignment), target_file_size: cfg.target_file_size.0 as usize, bytes_per_sync: cfg.bytes_per_sync.0 as usize, @@ -196,6 +379,7 @@ impl SinglePipe { capacity, })), active_file: CachePadded::new(Mutex::new(active_file)), + storage: CachePadded::new(RwLock::new(storage)), }; pipe.flush_metrics(total_files); Ok(pipe) @@ -204,9 +388,13 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. fn sync_dir(&self) -> Result<()> { + /* let path = PathBuf::from(&self.dir); std::fs::File::open(path).and_then(|d| d.sync_all())?; Ok(()) + */ + let mut storage = self.storage.write(); + storage.sync_all_dir() } /// Returns a shared [`LogFd`] for the specified file sequence number. @@ -237,6 +425,7 @@ impl SinglePipe { queue: self.queue, seq, }; + /* let path = file_id.build_file_path(&self.dir); let fd = { let mut files = self.files.write(); @@ -248,6 +437,37 @@ impl SinglePipe { Arc::new(self.file_system.create(&path)?) } }; + */ + let (fd, storage_type) = { + // Generate the file path. + let mut storage = self.storage.write(); + let (dir, storage_type) = { + if let Some((d, t)) = storage.get_stale_files_dir() { + // Has stale files for reusing. + (d, t) + } else if let Some((d, t)) = storage.get_free_dir(self.target_file_size) { + // Has free space for newly writing + (d, t) + } else { + // No space for writing. + return Err(Error::Other(box_err!( + "no free space for recording new logs." + ))); + } + }; + let path = file_id.build_file_path(&dir); + // Create the fd by recycling stale files or creating a new file. + let mut files = self.files.write(); + let first_file_seq = files.first_seq; + if files.recycle_one_file(&self.file_system, &dir, file_id) { + storage.pop_file(first_file_seq); + // Open the recycled file(file is already renamed) + (Arc::new(self.file_system.open(&path)?), storage_type) + } else { + // A new file is introduced. + (Arc::new(self.file_system.create(&path)?), storage_type) + } + }; let mut new_file = ActiveFile { seq, // The file might generated from a recycled stale-file, always reset the file @@ -274,7 +494,10 @@ impl SinglePipe { files.fds.push_back(FileWithFormat { handle: fd, format: LogFileFormat::new(version, alignment), + storage_type, }); + let mut storage = self.storage.write(); + storage.push_in_use_file(seq, storage_type); for listener in &self.listeners { listener.post_new_log_file(FileId { queue: self.queue, @@ -342,6 +565,19 @@ impl SinglePipe { ); } return Err(e); + // TODO: if main dir is full, we should use the sec-dir and + // create a new file for appending bytes timely. + /* + // Dead lock + if let Err(re) = self.rotate_imp(&mut active_file) { + // ... + panic!("error when rotate {} after error: {}, get: {}", seq, e, re); + } else { + println!("Try to re-append bytes to new rotated file."); + self.active_file.force_unlock(); + return self.append(bytes); + } + */ } let handle = FileBlockHandle { id: FileId { @@ -431,12 +667,17 @@ impl SinglePipe { (old_first_seq, purged, files.fds.len()) }; self.flush_metrics(remained); + // TODO: @lucasliang, following strategy need to be polished + // (1) how to build the purged path. + // (2) how to put the stale (obsolete) files into storage for later processing. + let mut storage = self.storage.write(); for seq in first_purge_seq..first_purge_seq + purged as u64 { let file_id = FileId { queue: self.queue, seq, }; - let path = file_id.build_file_path(&self.dir); + let (dir, _) = storage.find_dir(seq).unwrap(); + let path = file_id.build_file_path(&dir); #[cfg(feature = "failpoints")] { let remove_skipped = || { @@ -448,6 +689,24 @@ impl SinglePipe { } } self.file_system.delete(&path)?; + debug_assert!(storage.pop_file(seq).is_some()); + } + // Move stale files from StorageInfo.in_use_files to StorageInfo.stale_files + let (start_seq, end_seq) = { + let files = self.files.read(); + (files.first_seq, files.first_seq_in_use) + }; + for seq in (start_seq..end_seq).rev() { + match storage.pop_file(seq) { + Some(t) => { + storage.push_stale_file(seq, t); + } + None => + // break, + { + continue + } + } } Ok(purged) } @@ -706,6 +965,7 @@ mod tests { .unwrap(), ), format: LogFileFormat::default(), + storage_type: StorageDirType::Main, } } let dir = Builder::new() diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 8272dbe5..66083b3c 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -9,6 +9,7 @@ use std::path::Path; use std::sync::Arc; use fs2::FileExt; +use hashbrown::HashMap; use log::{error, info, warn}; use rayon::prelude::*; @@ -22,7 +23,7 @@ use crate::{Error, Result}; use super::format::{lock_file_path, FileNameExt, LogFileFormat}; use super::log_file::build_file_reader; -use super::pipe::{DualPipes, FileWithFormat, SinglePipe}; +use super::pipe::{DualPipes, FileWithFormat, SinglePipe, StorageDirType}; use super::reader::LogItemBatchFileReader; use crate::env::Handle; @@ -67,6 +68,7 @@ struct FileToRecover { seq: FileSeq, handle: Arc, format: Option, + storage_type: StorageDirType, } /// [`DualPipes`] factory that can also recover other customized memory states. @@ -99,6 +101,189 @@ impl DualPipesBuilder { /// Scans for all log files under the working directory. The directory will /// be created if not exists. pub fn scan(&mut self) -> Result<()> { + fn validate_dir(dir: &str) -> Result<()> { + let path = Path::new(dir); + if !path.exists() { + info!("Create raft log directory: {}", dir); + fs::create_dir(dir)?; + return Ok(()); + } + if !path.is_dir() { + return Err(box_err!("Not directory: {}", dir)); + } + Ok(()) + } + + fn parse_file_range( + dir: &str, + dir_type: StorageDirType, + append_range: &mut FileSeqRange, + rewrite_range: &mut FileSeqRange, + append_dict: &mut FileSeqDict, + rewrite_dict: &mut FileSeqDict, + ) -> Result<()> { + let path = Path::new(dir); + fs::read_dir(path)?.for_each(|e| { + if let Ok(e) = e { + let p = e.path(); + if p.is_file() { + match FileId::parse_file_name(p.file_name().unwrap().to_str().unwrap()) { + Some(FileId { + queue: LogQueue::Append, + seq, + }) => { + append_dict.insert(seq, dir_type); + append_range.0 = std::cmp::min(append_range.0, seq); + append_range.1 = std::cmp::max(append_range.1, seq); + } + Some(FileId { + queue: LogQueue::Rewrite, + seq, + }) => { + rewrite_dict.insert(seq, dir_type); + rewrite_range.0 = std::cmp::min(rewrite_range.0, seq); + rewrite_range.1 = std::cmp::max(rewrite_range.1, seq); + } + _ => {} + } + } + } + }); + Ok(()) + } + + // Validate main dir. + let dir = &self.cfg.dir; + validate_dir(dir)?; + self.dir_lock = Some(lock_dir(dir)?); + // Validate sub dir (secondary dir). + if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { + validate_dir(sub_dir)?; + } + + type FileSeqRange = (u64, u64); // (minimal_id, maximal_id) + type FileSeqDict = HashMap; // + let mut append_id_range = (u64::MAX, 0_u64); + let mut rewrite_id_range = (u64::MAX, 0_u64); + let mut append_file_dict: HashMap = HashMap::default(); + let mut rewrite_file_dict: HashMap = HashMap::default(); + + parse_file_range( + dir, + StorageDirType::Main, + &mut append_id_range, + &mut rewrite_id_range, + &mut append_file_dict, + &mut rewrite_file_dict, + )?; + if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { + parse_file_range( + sub_dir, + StorageDirType::Secondary, + &mut append_id_range, + &mut rewrite_id_range, + &mut append_file_dict, + &mut rewrite_file_dict, + )?; + } + + for (queue, min_id, max_id, files, file_dict) in [ + ( + LogQueue::Append, + append_id_range.0, + append_id_range.1, + &mut self.append_files, + &append_file_dict, + ), + ( + LogQueue::Rewrite, + rewrite_id_range.0, + rewrite_id_range.1, + &mut self.rewrite_files, + &rewrite_file_dict, + ), + ] { + if max_id > 0 { + // Try to cleanup stale metadata left by the previous version. + let max_sample = 100; + // Find the first obsolete metadata. + let mut delete_start = None; + for i in 0..max_sample { + let seq = i * min_id / max_sample; + let file_id = FileId { queue, seq }; + // Main dir + let path = file_id.build_file_path(&self.cfg.dir); + if self.file_system.exists_metadata(&path) { + delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); + break; + } + // Secondary dir + if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { + let path = file_id.build_file_path(sub_dir); + if self.file_system.exists_metadata(&path) { + delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); + break; + } + } + } + // Delete metadata starting from the oldest. Abort on error. + if let Some(start) = delete_start { + let mut success = 0; + for seq in start..min_id { + let file_id = FileId { queue, seq }; + // Main dir + let path = file_id.build_file_path(&self.cfg.dir); + if let Err(e) = self.file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + // Secondary dir + if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { + let path = file_id.build_file_path(sub_dir); + if let Err(e) = self.file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + } + success += 1; + } + warn!( + "deleted {} stale files of {:?} in range [{}, {}).", + success, queue, start, min_id, + ); + } + for seq in min_id..=max_id { + let file_id = FileId { queue, seq }; + let (dir, storage_type) = match file_dict.get(&seq) { + Some(StorageDirType::Main) => (&self.cfg.dir, StorageDirType::Main), + Some(StorageDirType::Secondary) => { + debug_assert!(self.cfg.sub_dir.is_some()); + ( + self.cfg.sub_dir.as_ref().unwrap(), + StorageDirType::Secondary, + ) + } + None => { + warn!( + "Detected a hole when scanning directory, discarding files before {:?}.", + file_id, + ); + files.clear(); + continue; + } + }; + let path = file_id.build_file_path(dir); + let handle = Arc::new(self.file_system.open(&path)?); + files.push(FileToRecover { + seq, + handle, + format: None, + storage_type, + }); + } + } + } + /* let dir = &self.cfg.dir; let path = Path::new(dir); if !path.exists() { @@ -204,6 +389,7 @@ impl DualPipesBuilder { } } } + */ Ok(()) } @@ -408,6 +594,7 @@ impl DualPipesBuilder { .map(|f| FileWithFormat { handle: f.handle.clone(), format: f.format.unwrap(), + storage_type: f.storage_type, }) .collect(); SinglePipe::open( From 3c927b2a2e7ccfbfe8a9eca1f03277312a935989 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 15 Aug 2022 16:17:22 +0800 Subject: [PATCH 02/22] [Enhancement] Supply the `StorageFull` judgement if the main dir was full but the secondary dir was free to flush data. Signed-off-by: Lucasliang --- src/engine.rs | 15 ++++++++-- src/env/default.rs | 14 ++++++++- src/file_pipe_log/pipe.rs | 48 ++++++++++++++++++++++--------- src/log_batch.rs | 13 +++++++++ tests/failpoints/test_io_error.rs | 3 +- 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 0ff84b77..279b32bf 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -7,7 +7,7 @@ use std::sync::{mpsc, Arc, Mutex}; use std::thread::{Builder as ThreadBuilder, JoinHandle}; use std::time::{Duration, Instant}; -use log::{error, info}; +use log::{error, info, warn}; use protobuf::{parse_from_bytes, Message}; use crate::config::{Config, RecoveryMode}; @@ -191,7 +191,18 @@ where debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); perf_context += &writer.perf_context_diff; set_perf_context(perf_context); - writer.finish()? + // Retry if `writer.finish()` returns a special err, remarking there still + // exists free space for this `LogBatch`. + let ret = writer.finish(); + if let Err(Error::Other(e)) = ret { + warn!( + "Append failed, err: {}, try to re-append this log_batch into other log", + e + ); + log_batch.reset_to_encoded_state(); + return self.write(log_batch, sync); + } + ret? }; let mut now = Instant::now(); diff --git a/src/env/default.rs b/src/env/default.rs index 4c5de77d..1edb5774 100644 --- a/src/env/default.rs +++ b/src/env/default.rs @@ -122,7 +122,16 @@ impl LogFd { let bytes = match pwrite(self.0, &content[written..], offset as i64) { Ok(bytes) => bytes, Err(e) if e == Errno::EINTR => continue, - Err(e) => return Err(from_nix_error(e, "pwrite")), + Err(e) => { + return { + if e == Errno::ENOSPC { + // no space left + Err(from_nix_error(e, "nospace")) + } else { + Err(from_nix_error(e, "pwrite")) + } + }; + } }; if bytes == 0 { break; @@ -130,6 +139,9 @@ impl LogFd { written += bytes; offset += bytes; } + fail_point!("log_fd::write::no_space_err", |_| { + Err(from_nix_error(nix::Error::ENOSPC, "nospace")) + }); fail_point!("log_fd::write::err", |_| { Err(from_nix_error(nix::Error::EINVAL, "fp")) }); diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index c11668c3..f7690554 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -121,6 +121,10 @@ impl StorageInfo { } fn get_stale_files_dir(&self) -> Option<(String, StorageDirType)> { + #[cfg(feature = "failpoints")] + { + fail::fail_point!("file_pipe_log::force_no_free_space", |_| None); + } for t in StorageDirType::iter() { let idx = t as usize; if idx >= self.storage.len() { @@ -134,6 +138,10 @@ impl StorageInfo { } fn get_free_dir(&self, target_size: usize) -> Option<(String, StorageDirType)> { + #[cfg(feature = "failpoints")] + { + fail::fail_point!("file_pipe_log::force_no_free_space", |_| { None }); + } for t in StorageDirType::iter() { let idx = t as usize; if idx >= self.storage.len() { @@ -564,20 +572,34 @@ impl SinglePipe { seq, e, te ); } - return Err(e); - // TODO: if main dir is full, we should use the sec-dir and - // create a new file for appending bytes timely. - /* - // Dead lock - if let Err(re) = self.rotate_imp(&mut active_file) { - // ... - panic!("error when rotate {} after error: {}, get: {}", seq, e, re); - } else { - println!("Try to re-append bytes to new rotated file."); - self.active_file.force_unlock(); - return self.append(bytes); + // TODO: Refine the following judgement if the error type + // `ErrorKind::StorageFull` is stable. + let no_space_err = { + if_chain::if_chain! { + if let Error::Io(ref e) = e; + let err_msg = format!("{}", e.get_ref().unwrap()); + if err_msg.contains("nospace"); + then { + true + } else { + false + } + } + }; + // If there still exists free space for this record, a special Err will + // be returned to the caller. + let storage = self.storage.read(); + if no_space_err + && (storage.get_stale_files_dir().is_some() + || storage.get_free_dir(self.target_file_size).is_some()) + { + return Err(Error::Other(box_err!( + "failed to write {} file, get {} try to flush it to other dir", + seq, + e + ))); } - */ + return Err(e); } let handle = FileBlockHandle { id: FileId { diff --git a/src/log_batch.rs b/src/log_batch.rs index 5d3f0676..5000664a 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -727,6 +727,9 @@ impl LogBatch { /// compression type to each entry index. pub(crate) fn finish_populate(&mut self, compression_threshold: usize) -> Result { let _t = StopWatch::new(perf_context!(log_populating_duration)); + if let BufState::Encoded(header_offset, _) = self.buf_state { + return Ok(self.buf.len() - header_offset); + } debug_assert!(self.buf_state == BufState::Open); if self.is_empty() { self.buf_state = BufState::Encoded(self.buf.len(), 0); @@ -830,6 +833,16 @@ impl LogBatch { self.item_batch.drain() } + /// Resets the `LogBatch` state to `Encoded(_, _)`. + pub(crate) fn reset_to_encoded_state(&mut self) { + match self.buf_state { + BufState::Sealed(header_offset, entries_len) => { + self.buf_state = BufState::Encoded(header_offset, entries_len); + } + _ => unreachable!(), + } + } + /// Returns approximate encoded size of this log batch. Might be larger /// than the actual size. pub fn approximate_size(&self) -> usize { diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index 219cc630..0b6cd973 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -220,7 +220,8 @@ fn test_concurrent_write_error() { let mut ctx = ConcurrentWriteContext::new(engine.clone()); // The second of three writes will fail. - fail::cfg("log_fd::write::err", "1*off->1*return->off").unwrap(); + let _f1 = FailGuard::new("log_fd::write::err", "1*off->1*return->off"); + let _f2 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); let entry_clone = entry.clone(); ctx.write_ext(move |e| { e.write(&mut generate_batch(1, 1, 11, Some(&entry_clone)), false) From 221cc8f5cd906ed8ba4e16f0088d1f3d3277376f Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 15 Aug 2022 17:58:23 +0800 Subject: [PATCH 03/22] Supply necessary corner cases when disk is full. Signed-off-by: Lucasliang --- src/engine.rs | 2 +- src/file_pipe_log/pipe.rs | 34 +++------- src/file_pipe_log/pipe_builder.rs | 107 ------------------------------ tests/failpoints/test_io_error.rs | 99 ++++++++++++++++++++++++++- 4 files changed, 106 insertions(+), 136 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 279b32bf..18fe6ced 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -196,7 +196,7 @@ where let ret = writer.finish(); if let Err(Error::Other(e)) = ret { warn!( - "Append failed, err: {}, try to re-append this log_batch into other log", + "Cannot append, err: {}, try to re-append this log_batch into other log", e ); log_batch.reset_to_encoded_state(); diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index f7690554..0c80bdb1 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -248,7 +248,6 @@ struct ActiveFile { /// A file-based log storage that arranges files as one single queue. pub(super) struct SinglePipe { queue: LogQueue, - /* dir: String, */ file_format: LogFileFormat, target_file_size: usize, bytes_per_sync: usize, @@ -373,7 +372,6 @@ impl SinglePipe { let total_files = fds.len(); let pipe = Self { queue, - /* dir: cfg.dir.clone(), */ file_format: LogFileFormat::new(cfg.format_version, alignment), target_file_size: cfg.target_file_size.0 as usize, bytes_per_sync: cfg.bytes_per_sync.0 as usize, @@ -396,11 +394,6 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. fn sync_dir(&self) -> Result<()> { - /* - let path = PathBuf::from(&self.dir); - std::fs::File::open(path).and_then(|d| d.sync_all())?; - Ok(()) - */ let mut storage = self.storage.write(); storage.sync_all_dir() } @@ -433,21 +426,9 @@ impl SinglePipe { queue: self.queue, seq, }; - /* - let path = file_id.build_file_path(&self.dir); - let fd = { - let mut files = self.files.write(); - if files.recycle_one_file(&self.file_system, &self.dir, file_id) { - // Open the recycled file(file is already renamed) - Arc::new(self.file_system.open(&path)?) - } else { - // A new file is introduced. - Arc::new(self.file_system.create(&path)?) - } - }; - */ + // Generate a new fd from a newly chosen file, might be reused from a stale + // file or generated from a newly created file. let (fd, storage_type) = { - // Generate the file path. let mut storage = self.storage.write(); let (dir, storage_type) = { if let Some((d, t)) = storage.get_stale_files_dir() { @@ -586,13 +567,14 @@ impl SinglePipe { } } }; + let has_free_space = { + let storage = self.storage.read(); + storage.get_stale_files_dir().is_some() + || storage.get_free_dir(self.target_file_size).is_some() + }; // If there still exists free space for this record, a special Err will // be returned to the caller. - let storage = self.storage.read(); - if no_space_err - && (storage.get_stale_files_dir().is_some() - || storage.get_free_dir(self.target_file_size).is_some()) - { + if no_space_err && has_free_space { return Err(Error::Other(box_err!( "failed to write {} file, get {} try to flush it to other dir", seq, diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 66083b3c..a58d8bb5 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -283,113 +283,6 @@ impl DualPipesBuilder { } } } - /* - let dir = &self.cfg.dir; - let path = Path::new(dir); - if !path.exists() { - info!("Create raft log directory: {}", dir); - fs::create_dir(dir)?; - self.dir_lock = Some(lock_dir(dir)?); - return Ok(()); - } - if !path.is_dir() { - return Err(box_err!("Not directory: {}", dir)); - } - self.dir_lock = Some(lock_dir(dir)?); - - let (mut min_append_id, mut max_append_id) = (u64::MAX, 0); - let (mut min_rewrite_id, mut max_rewrite_id) = (u64::MAX, 0); - fs::read_dir(path)?.for_each(|e| { - if let Ok(e) = e { - let p = e.path(); - if p.is_file() { - match FileId::parse_file_name(p.file_name().unwrap().to_str().unwrap()) { - Some(FileId { - queue: LogQueue::Append, - seq, - }) => { - min_append_id = std::cmp::min(min_append_id, seq); - max_append_id = std::cmp::max(max_append_id, seq); - } - Some(FileId { - queue: LogQueue::Rewrite, - seq, - }) => { - min_rewrite_id = std::cmp::min(min_rewrite_id, seq); - max_rewrite_id = std::cmp::max(max_rewrite_id, seq); - } - _ => {} - } - } - } - }); - - for (queue, min_id, max_id, files) in [ - ( - LogQueue::Append, - min_append_id, - max_append_id, - &mut self.append_files, - ), - ( - LogQueue::Rewrite, - min_rewrite_id, - max_rewrite_id, - &mut self.rewrite_files, - ), - ] { - if max_id > 0 { - // Try to cleanup stale metadata left by the previous version. - let max_sample = 100; - // Find the first obsolete metadata. - let mut delete_start = None; - for i in 0..max_sample { - let seq = i * min_id / max_sample; - let file_id = FileId { queue, seq }; - let path = file_id.build_file_path(dir); - if self.file_system.exists_metadata(&path) { - delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); - break; - } - } - // Delete metadata starting from the oldest. Abort on error. - if let Some(start) = delete_start { - let mut success = 0; - for seq in start..min_id { - let file_id = FileId { queue, seq }; - let path = file_id.build_file_path(dir); - if let Err(e) = self.file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; - } - success += 1; - } - warn!( - "deleted {} stale files of {:?} in range [{}, {}).", - success, queue, start, min_id, - ); - } - for seq in min_id..=max_id { - let file_id = FileId { queue, seq }; - let path = file_id.build_file_path(dir); - if !path.exists() { - warn!( - "Detected a hole when scanning directory, discarding files before {:?}.", - file_id, - ); - files.clear(); - } else { - let handle = Arc::new(self.file_system.open(&path)?); - files.push(FileToRecover { - seq, - handle, - format: None, - }); - } - } - } - } - */ Ok(()) } diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index 0b6cd973..bf21c2d8 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -220,8 +220,7 @@ fn test_concurrent_write_error() { let mut ctx = ConcurrentWriteContext::new(engine.clone()); // The second of three writes will fail. - let _f1 = FailGuard::new("log_fd::write::err", "1*off->1*return->off"); - let _f2 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); + fail::cfg("log_fd::write::err", "1*off->1*return->off").unwrap(); let entry_clone = entry.clone(); ctx.write_ext(move |e| { e.write(&mut generate_batch(1, 1, 11, Some(&entry_clone)), false) @@ -288,6 +287,102 @@ fn test_concurrent_write_error() { ); } +#[test] +fn test_no_space_write_error() { + let dir = tempfile::Builder::new() + .prefix("test_no_space_write_error") + .tempdir() + .unwrap(); + let cfg = Config { + dir: dir.path().to_str().unwrap().to_owned(), + target_file_size: ReadableSize(1), + ..Default::default() + }; + let entry = vec![b'x'; 1024]; + { + // If disk is full, a new Engine cannot be opened. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + assert!(Engine::open(cfg.clone()).is_err()); + } + { + // If disk is full after writing, the old engine should be available + // for `read`. + let engine = Engine::open(cfg.clone()).unwrap(); + engine + .write(&mut generate_batch(1, 11, 21, Some(&entry)), true) + .unwrap(); + drop(engine); + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!( + 10, + engine + .fetch_entries_to::(1, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // `Write` is abnormal for no space left, Engine should panic at `rotate`. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "off"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "return"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); + } + { + // Disk goes from `full` -> `spare`. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let engine = Engine::open(cfg.clone()).unwrap(); + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap(); + engine + .write(&mut generate_batch(3, 11, 21, Some(&entry)), true) + .unwrap(); + assert_eq!( + 10, + engine + .fetch_entries_to::(2, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(3, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // Disk is `full`, the `write` operation should `panic` at `rotate_imp`. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "return"); + let engine = Engine::open(cfg).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); + assert_eq!( + 10, + engine + .fetch_entries_to::(2, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(3, 11, 21, None, &mut vec![]) + .unwrap() + ); + } +} + #[test] fn test_non_atomic_write_error() { let dir = tempfile::Builder::new() From a97ca69a8bef00d28faeab16f1f44e5d7435f37d Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Tue, 16 Aug 2022 16:07:08 +0800 Subject: [PATCH 04/22] Polish codes and supply corner cases when `sub-dir` is set for use. Signed-off-by: Lucasliang --- src/config.rs | 18 ++++ src/env/default.rs | 25 +++++ src/env/mod.rs | 2 + src/file_pipe_log/pipe.rs | 24 ++--- src/file_pipe_log/pipe_builder.rs | 95 ++++++++--------- tests/failpoints/test_engine.rs | 170 ++++++++++++++++++++++++++++++ tests/failpoints/test_io_error.rs | 13 ++- 7 files changed, 282 insertions(+), 65 deletions(-) diff --git a/src/config.rs b/src/config.rs index 823d4ac1..20d63644 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,6 +3,7 @@ use log::warn; use serde::{Deserialize, Serialize}; +use crate::env::from_same_dev; use crate::pipe_log::Version; use crate::{util::ReadableSize, Result}; @@ -131,6 +132,23 @@ impl Default for Config { impl Config { pub fn sanitize(&mut self) -> Result<()> { + if let Some(ref sub_dir) = self.sub_dir { + match from_same_dev(&self.dir, sub_dir) { + Ok(false) => { + // dir and sub-dir are on different device. + } + Ok(true) => { + warn!( + "sub-dir ({}) and dir ({}) are on same device, ignore it", + sub_dir, self.dir + ); + self.sub_dir = None; + } + Err(e) => { + return Err(box_err!("invalid sub-dir or main dir, err: {}", e)); + } + } + } if self.purge_threshold.0 < self.target_file_size.0 { return Err(box_err!("purge-threshold < target-file-size")); } diff --git a/src/env/default.rs b/src/env/default.rs index 1edb5774..0930ae50 100644 --- a/src/env/default.rs +++ b/src/env/default.rs @@ -21,6 +21,31 @@ fn from_nix_error(e: nix::Error, custom: &'static str) -> std::io::Error { std::io::Error::new(kind, custom) } +pub fn from_same_dev>(dir1: P, dir2: P) -> IoResult { + fail_point!("env::force_on_different_dev", |_| { Ok(false) }); + if dir1.as_ref().starts_with(dir2.as_ref()) || dir2.as_ref().starts_with(dir1.as_ref()) { + return Ok(true); + } + #[cfg(any(target_os = "unix", target_os = "macos"))] + { + use std::os::unix::fs::MetadataExt; + let meta1 = std::fs::metadata(dir1.as_ref())?; + let meta2 = std::fs::metadata(dir2.as_ref())?; + Ok(meta1.dev() == meta2.dev()) + } + #[cfg(target_os = "linux")] + { + use std::os::linux::fs::MetadataExt; + let meta1 = std::fs::metadata(dir1.as_ref())?; + let meta2 = std::fs::metadata(dir2.as_ref())?; + Ok(meta1.st_dev() == meta2.st_dev()) + } + #[cfg(not(any(target_os = "linux", target_os = "unix", target_os = "macos")))] + { + unimplemented!() + } +} + /// A RAII-style low-level file. Errors occurred during automatic resource /// release are logged and ignored. /// diff --git a/src/env/mod.rs b/src/env/mod.rs index 50cce189..92e5c02c 100644 --- a/src/env/mod.rs +++ b/src/env/mod.rs @@ -10,6 +10,8 @@ mod obfuscated; pub use default::DefaultFileSystem; pub use obfuscated::ObfuscatedFileSystem; +pub(crate) use default::from_same_dev; + /// FileSystem pub trait FileSystem: Send + Sync { type Handle: Send + Sync + Handle; diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index 0c80bdb1..af5ac1bb 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -28,7 +28,7 @@ pub enum StorageDirType { Secondary = 1, } -#[derive(Debug, Clone)] +#[derive(Clone)] struct SingleStorageInfo { dir: String, stale_files: HashSet, @@ -73,14 +73,12 @@ impl SingleStorageInfo { } } -#[derive(Debug, Clone)] +#[derive(Clone)] struct StorageInfo { storage: Vec, } impl StorageInfo { - // fn get_disk_stat(&self, target: StorageDirType) -> Result {} - fn new(dir: String, sub_dir: Option) -> Self { let mut storage = vec![SingleStorageInfo::new(dir); 1]; if let Some(sub) = sub_dir { @@ -121,10 +119,6 @@ impl StorageInfo { } fn get_stale_files_dir(&self) -> Option<(String, StorageDirType)> { - #[cfg(feature = "failpoints")] - { - fail::fail_point!("file_pipe_log::force_no_free_space", |_| None); - } for t in StorageDirType::iter() { let idx = t as usize; if idx >= self.storage.len() { @@ -140,6 +134,12 @@ impl StorageInfo { fn get_free_dir(&self, target_size: usize) -> Option<(String, StorageDirType)> { #[cfg(feature = "failpoints")] { + fail::fail_point!("file_pipe_log::force_use_secondary_dir", |_| { + Some(( + self.storage[StorageDirType::Secondary as usize].dir.clone(), + StorageDirType::Secondary, + )) + }); fail::fail_point!("file_pipe_log::force_no_free_space", |_| { None }); } for t in StorageDirType::iter() { @@ -282,7 +282,7 @@ impl Drop for SinglePipe { queue: self.queue, seq, }; - let path = file_id.build_file_path(&dir.unwrap().0); + let path = file_id.build_file_path(&dir.as_ref().unwrap().0); if let Err(e) = self.file_system.delete(&path) { error!( "error while deleting stale file: {}, err_msg: {}", @@ -705,11 +705,7 @@ impl SinglePipe { Some(t) => { storage.push_stale_file(seq, t); } - None => - // break, - { - continue - } + None => break, } } Ok(purged) diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index a58d8bb5..cdbcac34 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -152,6 +152,49 @@ impl DualPipesBuilder { Ok(()) } + fn clean_stale_metadata( + file_system: &F, + dir: &str, + min_id: u64, + queue: LogQueue, + ) { + // Try to cleanup stale metadata left by the previous version. + let max_sample = 100; + // Find the first obsolete metadata. + let mut delete_start = None; + for i in 0..max_sample { + let seq = i * min_id / max_sample; + let file_id = FileId { queue, seq }; + // Main dir + let path = file_id.build_file_path(&dir); + if file_system.exists_metadata(&path) { + delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); + break; + } + } + // Delete metadata starting from the oldest. Abort on error. + if let Some(start) = delete_start { + let mut success = 0; + for seq in start..min_id { + let file_id = FileId { queue, seq }; + let path = file_id.build_file_path(&dir); + if file_system.exists_metadata(&path) { + if let Err(e) = file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + } else { + continue; + } + success += 1; + } + warn!( + "deleted {} stale files of {:?} in range [{}, {}).", + success, queue, start, min_id, + ); + } + } + // Validate main dir. let dir = &self.cfg.dir; validate_dir(dir)?; @@ -204,53 +247,11 @@ impl DualPipesBuilder { ), ] { if max_id > 0 { - // Try to cleanup stale metadata left by the previous version. - let max_sample = 100; - // Find the first obsolete metadata. - let mut delete_start = None; - for i in 0..max_sample { - let seq = i * min_id / max_sample; - let file_id = FileId { queue, seq }; - // Main dir - let path = file_id.build_file_path(&self.cfg.dir); - if self.file_system.exists_metadata(&path) { - delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); - break; - } - // Secondary dir - if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { - let path = file_id.build_file_path(sub_dir); - if self.file_system.exists_metadata(&path) { - delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); - break; - } - } - } - // Delete metadata starting from the oldest. Abort on error. - if let Some(start) = delete_start { - let mut success = 0; - for seq in start..min_id { - let file_id = FileId { queue, seq }; - // Main dir - let path = file_id.build_file_path(&self.cfg.dir); - if let Err(e) = self.file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; - } - // Secondary dir - if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { - let path = file_id.build_file_path(sub_dir); - if let Err(e) = self.file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; - } - } - success += 1; - } - warn!( - "deleted {} stale files of {:?} in range [{}, {}).", - success, queue, start, min_id, - ); + // Clean stale metadata in main dir. + clean_stale_metadata(self.file_system.as_ref(), &self.cfg.dir, min_id, queue); + // Clean stale metadata in secondary dir if it was specified. + if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { + clean_stale_metadata(self.file_system.as_ref(), sub_dir, min_id, queue); } for seq in min_id..=max_id { let file_id = FileId { queue, seq }; diff --git a/tests/failpoints/test_engine.rs b/tests/failpoints/test_engine.rs index 93faf640..b9bda1a4 100644 --- a/tests/failpoints/test_engine.rs +++ b/tests/failpoints/test_engine.rs @@ -723,3 +723,173 @@ fn test_build_engine_with_datalayout_abnormal() { Engine::open(cfg).unwrap(); } } + +#[test] +fn test_build_engine_with_multi_dir() { + let dir = tempfile::Builder::new() + .prefix("test_build_engine_with_multi_dir_1") + .tempdir() + .unwrap(); + let sub_dir = tempfile::Builder::new() + .prefix("test_build_engine_with_multi_dir_2") + .tempdir() + .unwrap(); + let data = vec![b'x'; 1024]; + let rid = 1; + let cfg = Config { + dir: dir.path().to_str().unwrap().to_owned(), + sub_dir: Some(sub_dir.path().to_str().unwrap().to_owned()), + target_file_size: ReadableSize::kb(2), + purge_threshold: ReadableSize::kb(4), + ..Default::default() + }; + let mut start_seq: u64; + { + // Set config with abnormal settings + let abnormal_dir = "./abnormal_testing"; + let path = std::path::Path::new(abnormal_dir); + if !path.exists() { + std::fs::create_dir(path).unwrap(); + } + let cfg_err = Config { + sub_dir: Some(abnormal_dir.to_owned()), + ..cfg.clone() + }; + let engine = Engine::open(cfg_err).unwrap(); + append(&engine, rid, 1, 2, Some(&data)); // file_seq: 1 + append(&engine, rid, 2, 3, Some(&data)); + append(&engine, rid, 3, 4, Some(&data)); // file_seq: 2 + append(&engine, rid, 4, 5, Some(&data)); + append(&engine, rid, 5, 6, Some(&data)); // file_seq: 3 + start_seq = engine.file_span(LogQueue::Append).0; + assert_eq!( + 5, + engine + .fetch_entries_to::( + rid, /* region */ + 1, /* begin */ + 6, /* end */ + None, /* max_size */ + &mut vec![], + ) + .unwrap() + ); + engine.compact_to(rid, 3); + engine.purge_expired_files().unwrap(); + assert!(engine.file_span(LogQueue::Append).0 > start_seq); + start_seq = engine.file_span(LogQueue::Append).0; + } + // Open engine with multi directories, main dir and secondary dir. + { + // (1) Write to main dir + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + append(&engine, rid, 6, 7, Some(&data)); + append(&engine, rid, 7, 8, Some(&data)); // file_seq: 4 + append(&engine, rid, 8, 9, Some(&data)); + append(&engine, rid, 9, 10, Some(&data)); // file_seq: 5 + assert_eq!( + 6, + engine + .fetch_entries_to::(rid, 4, 10, None, &mut vec![],) + .unwrap() + ); + engine.compact_to(rid, 8); + engine.purge_expired_files().unwrap(); + assert!(engine.file_span(LogQueue::Append).0 > start_seq); + start_seq = engine.file_span(LogQueue::Append).0; + } + { + // (2) Write to secondary dir + let _f1 = FailGuard::new("env::force_on_different_dev", "return"); + let _f2 = FailGuard::new("file_pipe_log::force_use_secondary_dir", "return"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + append(&engine, rid, 10, 11, Some(&data)); + append(&engine, rid, 11, 12, Some(&data)); + append(&engine, rid, 12, 13, Some(&data)); + assert_eq!( + 4, + engine + .fetch_entries_to::(rid, 9, 13, None, &mut vec![],) + .unwrap() + ); + engine.compact_to(rid, 11); + engine.purge_expired_files().unwrap(); + assert!(engine.file_span(LogQueue::Append).0 > start_seq); + start_seq = engine.file_span(LogQueue::Append).0; + drop(engine); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + append(&engine, rid, 13, 14, Some(&data)); + append(&engine, rid, 14, 15, Some(&data)); + assert_eq!( + 3, + engine + .fetch_entries_to::(rid, 12, 15, None, &mut vec![],) + .unwrap() + ); + } + { + // (3) Back to main dir + let _f1 = FailGuard::new("env::force_on_different_dev", "return"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + append(&engine, rid, 15, 16, Some(&data)); + append(&engine, rid, 16, 17, Some(&data)); + append(&engine, rid, 17, 18, Some(&data)); + append(&engine, rid, 18, 19, Some(&data)); + assert_eq!( + 5, + engine + .fetch_entries_to::(rid, 12, 17, None, &mut vec![],) + .unwrap() + ); + let before = engine.file_span(LogQueue::Append); + drop(engine); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append), before); + } + { + // (4) Open recycling logs feature. + let _f1 = FailGuard::new("env::force_on_different_dev", "return"); + let cfg_rec = Config { + format_version: Version::V2, + enable_log_recycle: true, + target_file_size: ReadableSize(1), + purge_threshold: ReadableSize(4), + ..cfg + }; + let engine = Engine::open(cfg_rec.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + engine.compact_to(rid, 18); + engine.purge_expired_files().unwrap(); + assert!(engine.file_span(LogQueue::Append).0 > start_seq); + append(&engine, rid, 19, 20, Some(&data)); + append(&engine, rid, 20, 21, Some(&data)); + append(&engine, rid, 21, 22, Some(&data)); + let before = engine.file_span(LogQueue::Append); + drop(engine); + // recycling stale files by compaction + let engine = Engine::open(cfg_rec.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append), before); + engine.compact_to(rid, 20); + engine.purge_expired_files().unwrap(); + append(&engine, rid, 22, 23, Some(&data)); // reuse + append(&engine, rid, 23, 24, Some(&data)); // reuse + start_seq = engine.file_span(LogQueue::Append).0; + drop(engine); + let engine = Engine::open(cfg_rec.clone()).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + // append records to secondary dir -> recycle -> reopen() + let _f = FailGuard::new("file_pipe_log::force_use_secondary_dir", "return"); + append(&engine, rid, 24, 25, Some(&data)); + append(&engine, rid, 25, 26, Some(&data)); + engine.compact_to(rid, 21); + engine.purge_expired_files().unwrap(); + start_seq = engine.file_span(LogQueue::Append).0; + drop(engine); + let engine = Engine::open(cfg_rec).unwrap(); + assert_eq!(engine.file_span(LogQueue::Append).0, start_seq); + } +} diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index bf21c2d8..d9e0c99a 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -301,7 +301,7 @@ fn test_no_space_write_error() { let entry = vec![b'x'; 1024]; { // If disk is full, a new Engine cannot be opened. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); assert!(Engine::open(cfg.clone()).is_err()); } { @@ -312,7 +312,7 @@ fn test_no_space_write_error() { .write(&mut generate_batch(1, 11, 21, Some(&entry)), true) .unwrap(); drop(engine); - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); let engine = Engine::open(cfg.clone()).unwrap(); assert_eq!( 10, @@ -323,8 +323,7 @@ fn test_no_space_write_error() { } { // `Write` is abnormal for no space left, Engine should panic at `rotate`. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "off"); - let _f2 = FailGuard::new("log_fd::write::no_space_err", "return"); + let _f = FailGuard::new("log_fd::write::no_space_err", "return"); let engine = Engine::open(cfg.clone()).unwrap(); assert!(catch_unwind_silent(|| { engine @@ -338,6 +337,12 @@ fn test_no_space_write_error() { let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); let engine = Engine::open(cfg.clone()).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap(); + }) + .is_err()); engine .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) .unwrap(); From 3da8d130431b035e73407d294dd21c5e7456bd4b Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Tue, 16 Aug 2022 19:25:26 +0800 Subject: [PATCH 05/22] Supply basic uts for StorageInfo to improve the codepath coverage ratio. Signed-off-by: Lucasliang --- src/config.rs | 18 -------- src/file_pipe_log/pipe.rs | 74 ++++++++++++++++++++++++++++++- src/file_pipe_log/pipe_builder.rs | 45 ++++++++++++------- tests/failpoints/test_engine.rs | 43 ++++++++++++++---- tests/failpoints/test_io_error.rs | 44 +++++++++--------- 5 files changed, 161 insertions(+), 63 deletions(-) diff --git a/src/config.rs b/src/config.rs index 20d63644..823d4ac1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,7 +3,6 @@ use log::warn; use serde::{Deserialize, Serialize}; -use crate::env::from_same_dev; use crate::pipe_log::Version; use crate::{util::ReadableSize, Result}; @@ -132,23 +131,6 @@ impl Default for Config { impl Config { pub fn sanitize(&mut self) -> Result<()> { - if let Some(ref sub_dir) = self.sub_dir { - match from_same_dev(&self.dir, sub_dir) { - Ok(false) => { - // dir and sub-dir are on different device. - } - Ok(true) => { - warn!( - "sub-dir ({}) and dir ({}) are on same device, ignore it", - sub_dir, self.dir - ); - self.sub_dir = None; - } - Err(e) => { - return Err(box_err!("invalid sub-dir or main dir, err: {}", e)); - } - } - } if self.purge_threshold.0 < self.target_file_size.0 { return Err(box_err!("purge-threshold < target-file-size")); } diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index af5ac1bb..ed078202 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -73,7 +73,8 @@ impl SingleStorageInfo { } } -#[derive(Clone)] +/// Represents the info of storage dirs, including `main dir` and +/// `secondary dir`. struct StorageInfo { storage: Vec, } @@ -1149,4 +1150,75 @@ mod tests { assert_eq!(file_context.version, Version::V2); assert_eq!(file_context.id.seq, 3); } + + #[test] + fn test_storage_info() { + let dir = Builder::new() + .prefix("test_storage_info") + .tempdir() + .unwrap(); + let secondary_dir = Builder::new() + .prefix("test_storage_info_sec") + .tempdir() + .unwrap(); + let path = dir.path().to_str().unwrap().to_owned(); + let sec_path = secondary_dir.path().to_str().unwrap().to_owned(); + { + // Tests SingleStorageInfo. + let mut main_storage = SingleStorageInfo::new(path.clone()); + assert!(!main_storage.has_stale_file()); + assert!(!main_storage.find_stale_file(10)); + assert!(!main_storage.find_in_use_file(1)); + main_storage.push_in_use_file(10); + main_storage.push_stale_file(1); + let mut cpy_storage = main_storage.clone(); + assert!(cpy_storage.has_stale_file()); + assert!(cpy_storage.find_stale_file(1)); + assert!(cpy_storage.find_in_use_file(10)); + assert!(!cpy_storage.pop_in_use_file(1)); + assert!(cpy_storage.pop_stale_file(1)); + assert!(cpy_storage.pop_in_use_file(10)); + } + { + // Tests StorageInfo with main dir only. + let mut storage = StorageInfo::new(path.clone(), None); + assert!(storage.push_in_use_file(1, StorageDirType::Main)); + assert!(!storage.push_in_use_file(2, StorageDirType::Secondary)); + assert!(!storage.push_stale_file(2, StorageDirType::Secondary)); + assert_eq!(storage.pop_file(1).unwrap(), StorageDirType::Main); + assert!(storage.pop_file(1).is_none()); + assert!(storage.find_dir(1).is_none()); + assert!(storage.push_stale_file(1, StorageDirType::Main)); + assert_eq!( + storage.find_dir(1).unwrap(), + (path.clone(), StorageDirType::Main) + ); + assert!(storage.get_free_dir(usize::MAX).is_none()); + assert_eq!( + storage.get_free_dir(16).unwrap(), + (path.clone(), StorageDirType::Main) + ); + } + { + // Tests StorageInfo with multi dirs. + let mut storage = StorageInfo::new(path.clone(), Some(sec_path.clone())); + assert!(storage.push_in_use_file(1, StorageDirType::Main)); + assert!(storage.push_stale_file(4, StorageDirType::Main)); + assert!(storage.push_in_use_file(2, StorageDirType::Secondary)); + assert!(storage.push_stale_file(3, StorageDirType::Secondary)); + assert_eq!(storage.pop_file(1).unwrap(), StorageDirType::Main); + assert_eq!(storage.pop_file(3).unwrap(), StorageDirType::Secondary); + assert!(storage.pop_file(1).is_none()); + assert!(storage.find_dir(1).is_none()); + assert_eq!( + storage.find_dir(2).unwrap(), + (sec_path, StorageDirType::Secondary) + ); + assert!(storage.get_free_dir(usize::MAX).is_none()); + assert_eq!( + storage.get_free_dir(16).unwrap(), + (path, StorageDirType::Main) + ); + } + } } diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index cdbcac34..f0dd872b 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -14,7 +14,7 @@ use log::{error, info, warn}; use rayon::prelude::*; use crate::config::{Config, RecoveryMode}; -use crate::env::FileSystem; +use crate::env::{from_same_dev, FileSystem}; use crate::event_listener::EventListener; use crate::log_batch::LogItemBatch; use crate::pipe_log::{FileId, FileSeq, LogQueue}; @@ -121,8 +121,9 @@ impl DualPipesBuilder { rewrite_range: &mut FileSeqRange, append_dict: &mut FileSeqDict, rewrite_dict: &mut FileSeqDict, - ) -> Result<()> { + ) -> Result { let path = Path::new(dir); + let mut valid_file_count = 0_u64; fs::read_dir(path)?.for_each(|e| { if let Ok(e) = e { let p = e.path(); @@ -135,6 +136,7 @@ impl DualPipesBuilder { append_dict.insert(seq, dir_type); append_range.0 = std::cmp::min(append_range.0, seq); append_range.1 = std::cmp::max(append_range.1, seq); + valid_file_count += 1; } Some(FileId { queue: LogQueue::Rewrite, @@ -143,13 +145,14 @@ impl DualPipesBuilder { rewrite_dict.insert(seq, dir_type); rewrite_range.0 = std::cmp::min(rewrite_range.0, seq); rewrite_range.1 = std::cmp::max(rewrite_range.1, seq); + valid_file_count += 1; } _ => {} } } } }); - Ok(()) + Ok(valid_file_count) } fn clean_stale_metadata( @@ -204,13 +207,13 @@ impl DualPipesBuilder { validate_dir(sub_dir)?; } - type FileSeqRange = (u64, u64); // (minimal_id, maximal_id) - type FileSeqDict = HashMap; // + type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ + type FileSeqDict = HashMap; /* HashMap */ let mut append_id_range = (u64::MAX, 0_u64); let mut rewrite_id_range = (u64::MAX, 0_u64); let mut append_file_dict: HashMap = HashMap::default(); let mut rewrite_file_dict: HashMap = HashMap::default(); - + // Parse files in `dir`. parse_file_range( dir, StorageDirType::Main, @@ -219,15 +222,27 @@ impl DualPipesBuilder { &mut append_file_dict, &mut rewrite_file_dict, )?; - if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { - parse_file_range( - sub_dir, - StorageDirType::Secondary, - &mut append_id_range, - &mut rewrite_id_range, - &mut append_file_dict, - &mut rewrite_file_dict, - )?; + // Parse files in `sub-dir`. + if_chain::if_chain! { + if let Some(sub_dir) = self.cfg.sub_dir.as_ref(); + if let Ok(0) = parse_file_range( + sub_dir, + StorageDirType::Secondary, + &mut append_id_range, + &mut rewrite_id_range, + &mut append_file_dict, + &mut rewrite_file_dict, + ); + if let Ok(true) = from_same_dev(&self.cfg.dir, sub_dir); + then { + // If the count of valid file in secondary dir was 0, we directly + // reset the `cfg.sub_dir` to None. + warn!( + "sub-dir ({}) and dir ({}) are on same device, ignore it", + sub_dir, self.cfg.dir + ); + self.cfg.sub_dir = None; // reset to None + } } for (queue, min_id, max_id, files, file_dict) in [ diff --git a/tests/failpoints/test_engine.rs b/tests/failpoints/test_engine.rs index b9bda1a4..e2c9936a 100644 --- a/tests/failpoints/test_engine.rs +++ b/tests/failpoints/test_engine.rs @@ -745,15 +745,13 @@ fn test_build_engine_with_multi_dir() { }; let mut start_seq: u64; { - // Set config with abnormal settings - let abnormal_dir = "./abnormal_testing"; - let path = std::path::Path::new(abnormal_dir); - if !path.exists() { - std::fs::create_dir(path).unwrap(); - } + // Set config with abnormal settings => same prefix + let abnormal_dir = format!("{}/abnormal", dir.path().to_str().unwrap().to_owned()); + let abnormal_sub_dir = format!("{}/testing", abnormal_dir); let cfg_err = Config { - sub_dir: Some(abnormal_dir.to_owned()), - ..cfg.clone() + dir: abnormal_dir, + sub_dir: Some(abnormal_sub_dir), + ..cfg }; let engine = Engine::open(cfg_err).unwrap(); append(&engine, rid, 1, 2, Some(&data)); // file_seq: 1 @@ -777,6 +775,35 @@ fn test_build_engine_with_multi_dir() { engine.compact_to(rid, 3); engine.purge_expired_files().unwrap(); assert!(engine.file_span(LogQueue::Append).0 > start_seq); + assert_eq!( + 2, + engine + .fetch_entries_to::(rid, 4, 6, None, &mut vec![],) + .unwrap() + ); + } + { + // Set config with abnormal settings => same device + let cfg_err = Config { + sub_dir: Some("./abnormal_testing".to_owned()), + ..cfg.clone() + }; + let engine = Engine::open(cfg_err).unwrap(); + append(&engine, rid, 1, 2, Some(&data)); // file_seq: 1 + append(&engine, rid, 2, 3, Some(&data)); + append(&engine, rid, 3, 4, Some(&data)); // file_seq: 2 + append(&engine, rid, 4, 5, Some(&data)); + append(&engine, rid, 5, 6, Some(&data)); // file_seq: 3 + start_seq = engine.file_span(LogQueue::Append).0; + assert_eq!( + 5, + engine + .fetch_entries_to::(rid, 1, 6, None, &mut vec![],) + .unwrap() + ); + engine.compact_to(rid, 3); + engine.purge_expired_files().unwrap(); + assert!(engine.file_span(LogQueue::Append).0 > start_seq); start_seq = engine.file_span(LogQueue::Append).0; } // Open engine with multi directories, main dir and secondary dir. diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index d9e0c99a..ec804bbc 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -295,7 +295,7 @@ fn test_no_space_write_error() { .unwrap(); let cfg = Config { dir: dir.path().to_str().unwrap().to_owned(), - target_file_size: ReadableSize(1), + target_file_size: ReadableSize(2048), ..Default::default() }; let entry = vec![b'x'; 1024]; @@ -324,7 +324,11 @@ fn test_no_space_write_error() { { // `Write` is abnormal for no space left, Engine should panic at `rotate`. let _f = FailGuard::new("log_fd::write::no_space_err", "return"); - let engine = Engine::open(cfg.clone()).unwrap(); + let cfg_err = Config { + target_file_size: ReadableSize(1), + ..cfg.clone() + }; + let engine = Engine::open(cfg_err).unwrap(); assert!(catch_unwind_silent(|| { engine .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) @@ -333,16 +337,10 @@ fn test_no_space_write_error() { .is_err()); } { - // Disk goes from `full` -> `spare`. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); + // Disk goes from `spare` -> `full` -> `spare`. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); let engine = Engine::open(cfg.clone()).unwrap(); - assert!(catch_unwind_silent(|| { - engine - .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) - .unwrap(); - }) - .is_err()); engine .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) .unwrap(); @@ -363,16 +361,20 @@ fn test_no_space_write_error() { ); } { - // Disk is `full`, the `write` operation should `panic` at `rotate_imp`. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "return"); - let _f2 = FailGuard::new("log_fd::write::no_space_err", "return"); - let engine = Engine::open(cfg).unwrap(); - assert!(catch_unwind_silent(|| { - engine - .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) - .unwrap_err(); - }) - .is_err()); + // Disk is `full` -> `spare`, the first `write` operation should failed. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let cfg_err = Config { + target_file_size: ReadableSize(1), + ..cfg + }; + let engine = Engine::open(cfg_err).unwrap(); + engine + .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) + .unwrap_err(); + engine + .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) + .unwrap(); assert_eq!( 10, engine @@ -382,7 +384,7 @@ fn test_no_space_write_error() { assert_eq!( 10, engine - .fetch_entries_to::(3, 11, 21, None, &mut vec![]) + .fetch_entries_to::(4, 11, 21, None, &mut vec![]) .unwrap() ); } From 5231a07d8389a60b2323de5962410e1c6f6585d1 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Wed, 17 Aug 2022 15:26:28 +0800 Subject: [PATCH 06/22] Supply examples for setting secondary-dir with uts in config.rs. Signed-off-by: Lucasliang --- src/config.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/config.rs b/src/config.rs index 823d4ac1..3829526f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -285,4 +285,29 @@ mod tests { .unwrap() .contains("tolerate-corrupted-tail-records")); } + + #[test] + fn test_secondary_dir() { + { + // Set the sub-dir with `Some("...")` + let dir_list = r#" + dir = "./test/" + sub-dir = 'Some("./test_secondary")' + "#; + let mut load: Config = toml::from_str(dir_list).unwrap(); + load.sanitize().unwrap(); + assert!(load.sub_dir.is_some()); + assert!(toml::to_string(&load).unwrap().contains("test_secondary")); + } + { + // Set the sub-dir with `"..."` + let wrong_dir_list = r#" + dir = "./test/" + sub-dir = "./test_secondary" + "#; + let mut load: Config = toml::from_str(wrong_dir_list).unwrap(); + load.sanitize().unwrap(); + assert!(load.sub_dir.is_some()); + } + } } From 2278f0ec8f597d6eb559f4b5c7d17b96329c69ff Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Wed, 17 Aug 2022 18:29:32 +0800 Subject: [PATCH 07/22] Polish the design of StorageInfo to make the whole structure on secondary-dir more readable and concise. Signed-off-by: Lucasliang --- src/file_pipe_log/pipe.rs | 353 +++++++++++--------------------------- 1 file changed, 97 insertions(+), 256 deletions(-) diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index ed078202..dfe087ea 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -1,6 +1,6 @@ // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0. -use std::collections::{HashSet, VecDeque}; +use std::collections::VecDeque; use std::fs::File; use std::path::PathBuf; use std::sync::Arc; @@ -28,116 +28,27 @@ pub enum StorageDirType { Secondary = 1, } -#[derive(Clone)] -struct SingleStorageInfo { - dir: String, - stale_files: HashSet, - in_use_files: HashSet, -} - -impl SingleStorageInfo { - fn new(dir: String) -> Self { - Self { - dir, - stale_files: HashSet::default(), - in_use_files: HashSet::default(), - } - } - - fn has_stale_file(&self) -> bool { - !self.stale_files.is_empty() - } - - fn find_stale_file(&self, file_seq: FileSeq) -> bool { - matches!(self.stale_files.get(&file_seq), Some(_)) - } - - fn find_in_use_file(&self, file_seq: FileSeq) -> bool { - matches!(self.in_use_files.get(&file_seq), Some(_)) - } - - fn pop_stale_file(&mut self, file_seq: FileSeq) -> bool { - self.stale_files.remove(&file_seq) - } - - fn push_stale_file(&mut self, file_seq: FileSeq) -> bool { - self.stale_files.insert(file_seq) - } - - fn pop_in_use_file(&mut self, file_seq: FileSeq) -> bool { - self.in_use_files.remove(&file_seq) - } - - fn push_in_use_file(&mut self, file_seq: FileSeq) -> bool { - self.in_use_files.insert(file_seq) - } -} - /// Represents the info of storage dirs, including `main dir` and /// `secondary dir`. struct StorageInfo { - storage: Vec, + storage: Vec, } impl StorageInfo { fn new(dir: String, sub_dir: Option) -> Self { - let mut storage = vec![SingleStorageInfo::new(dir); 1]; + let mut storage = vec![dir; 1]; if let Some(sub) = sub_dir { - storage.push(SingleStorageInfo::new(sub)); + storage.push(sub); } Self { storage } } - fn pop_file(&mut self, file_seq: FileSeq) -> Option { - for t in StorageDirType::iter() { - let idx = t as usize; - if idx >= self.storage.len() { - break; - } - if self.storage[idx].pop_stale_file(file_seq) - || self.storage[idx].pop_in_use_file(file_seq) - { - return Some(t); - } - } - None - } - - fn push_in_use_file(&mut self, file_seq: FileSeq, storage_type: StorageDirType) -> bool { - let idx = storage_type as usize; - if idx >= self.storage.len() { - return false; - } - self.storage[idx].push_in_use_file(file_seq) - } - - fn push_stale_file(&mut self, file_seq: FileSeq, storage_type: StorageDirType) -> bool { - let idx = storage_type as usize; - if idx >= self.storage.len() { - return false; - } - self.storage[idx].push_stale_file(file_seq) - } - - fn get_stale_files_dir(&self) -> Option<(String, StorageDirType)> { - for t in StorageDirType::iter() { - let idx = t as usize; - if idx >= self.storage.len() { - break; - } - if self.storage[idx].has_stale_file() { - return Some((self.storage[idx].dir.clone(), t)); - } - } - None - } - fn get_free_dir(&self, target_size: usize) -> Option<(String, StorageDirType)> { #[cfg(feature = "failpoints")] { fail::fail_point!("file_pipe_log::force_use_secondary_dir", |_| { Some(( - self.storage[StorageDirType::Secondary as usize].dir.clone(), + self.storage[StorageDirType::Secondary as usize].clone(), StorageDirType::Secondary, )) }); @@ -148,36 +59,30 @@ impl StorageInfo { if idx >= self.storage.len() { break; } - let disk_stats = match fs2::statvfs(&self.storage[idx].dir) { + let disk_stats = match fs2::statvfs(&self.storage[idx]) { Err(e) => { error!( "get disk stat for raft engine failed, dir_path: {}, err: {}", - &self.storage[idx].dir, e + &self.storage[idx], e ); return None; } Ok(stats) => stats, }; if target_size <= disk_stats.available_space() as usize { - return Some((self.storage[idx].dir.clone(), t)); + return Some((self.storage[idx].clone(), t)); } } None } - fn find_dir(&self, file_seq: FileSeq) -> Option<(String, StorageDirType)> { - for t in StorageDirType::iter() { - let idx = t as usize; - if idx >= self.storage.len() { - break; - } - if self.storage[idx].find_stale_file(file_seq) - || self.storage[idx].find_in_use_file(file_seq) - { - return Some((self.storage[idx].dir.clone(), t)); - } + fn get_dir(&self, storage_type: StorageDirType) -> Option<&str> { + let idx = storage_type as usize; + if idx >= self.storage.len() { + None + } else { + Some(&self.storage[idx]) } - None } fn sync_all_dir(&mut self) -> Result<()> { @@ -186,7 +91,7 @@ impl StorageInfo { if idx >= self.storage.len() { break; } - let path = PathBuf::from(&self.storage[idx].dir); + let path = PathBuf::from(&self.storage[idx]); std::fs::File::open(path).and_then(|d| d.sync_all())?; } Ok(()) @@ -209,6 +114,8 @@ struct FileCollection { /// `0` => no capbility for recycling stale files /// `_` => finite volume for recycling stale files capacity: usize, + /// Info of storage dir. + storage: StorageInfo, } impl FileCollection { @@ -216,7 +123,11 @@ impl FileCollection { /// /// Attention please, the recycled file would be automatically `renamed` in /// this func. - fn recycle_one_file(&mut self, file_system: &F, dir_path: &str, dst_fd: FileId) -> bool { + fn recycle_one_file( + &mut self, + file_system: &F, + dst_fd: FileId, + ) -> (bool, Option) { debug_assert!(self.first_seq <= self.first_seq_in_use); debug_assert!(!self.fds.is_empty()); if self.first_seq < self.first_seq_in_use { @@ -224,8 +135,10 @@ impl FileCollection { queue: dst_fd.queue, seq: self.first_seq, }; - let src_path = first_file_id.build_file_path(dir_path); // src filepath - let dst_path = dst_fd.build_file_path(dir_path); // dst filepath + let storage_type = self.fds[0].storage_type; + let dir = self.storage.get_dir(storage_type).unwrap(); + let src_path = first_file_id.build_file_path(dir); // src filepath + let dst_path = dst_fd.build_file_path(dir); // dst filepath if let Err(e) = file_system.reuse(&src_path, &dst_path) { error!("error while trying to recycle one expired file: {}", e); } else { @@ -233,10 +146,10 @@ impl FileCollection { // success. self.fds.pop_front().unwrap(); self.first_seq += 1; - return true; + return (true, Some(storage_type)); } } - false + (false, None) } } @@ -262,8 +175,6 @@ pub(super) struct SinglePipe { /// `active_file` must be locked first to acquire both `files` and /// `active_file` active_file: CachePadded>>, - /// Info of storage dir. - storage: CachePadded>, } impl Drop for SinglePipe { @@ -275,15 +186,16 @@ impl Drop for SinglePipe { // Here, we also should release the unnecessary disk space // occupied by stale files. let files = self.files.write(); - let storage = self.storage.read(); for seq in files.first_seq..files.first_seq_in_use { - let dir = storage.find_dir(seq); - debug_assert!(dir.is_some()); let file_id = FileId { queue: self.queue, seq, }; - let path = file_id.build_file_path(&dir.as_ref().unwrap().0); + let dir = files + .storage + .get_dir(files.fds[(seq - files.first_seq) as usize].storage_type); + debug_assert!(dir.is_some()); + let path = file_id.build_file_path(dir.unwrap()); if let Err(e) = self.file_system.delete(&path) { error!( "error while deleting stale file: {}, err_msg: {}", @@ -321,7 +233,7 @@ impl SinglePipe { } } - let mut storage = StorageInfo::new(cfg.dir.clone(), cfg.sub_dir.clone()); + let storage = StorageInfo::new(cfg.dir.clone(), cfg.sub_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; @@ -338,7 +250,6 @@ impl SinglePipe { queue, seq: first_seq, }; - // let fd = Arc::new(file_system.create(&file_id.build_file_path(&cfg.dir))?); let fd = Arc::new(file_system.create(&file_id.build_file_path(&dir))?); fds.push_back(FileWithFormat { handle: fd, @@ -351,7 +262,6 @@ impl SinglePipe { }; for seq in first_seq..=active_seq { - storage.push_in_use_file(seq, fds[(seq - first_seq) as usize].storage_type); for listener in &listeners { listener.post_new_log_file(FileId { queue, seq }); } @@ -384,9 +294,9 @@ impl SinglePipe { first_seq_in_use: first_seq, fds, capacity, + storage, })), active_file: CachePadded::new(Mutex::new(active_file)), - storage: CachePadded::new(RwLock::new(storage)), }; pipe.flush_metrics(total_files); Ok(pipe) @@ -395,8 +305,8 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. fn sync_dir(&self) -> Result<()> { - let mut storage = self.storage.write(); - storage.sync_all_dir() + let mut files = self.files.write(); + files.storage.sync_all_dir() } /// Returns a shared [`LogFd`] for the specified file sequence number. @@ -430,32 +340,24 @@ impl SinglePipe { // Generate a new fd from a newly chosen file, might be reused from a stale // file or generated from a newly created file. let (fd, storage_type) = { - let mut storage = self.storage.write(); - let (dir, storage_type) = { - if let Some((d, t)) = storage.get_stale_files_dir() { - // Has stale files for reusing. - (d, t) - } else if let Some((d, t)) = storage.get_free_dir(self.target_file_size) { - // Has free space for newly writing - (d, t) - } else { - // No space for writing. - return Err(Error::Other(box_err!( - "no free space for recording new logs." - ))); - } - }; - let path = file_id.build_file_path(&dir); // Create the fd by recycling stale files or creating a new file. let mut files = self.files.write(); - let first_file_seq = files.first_seq; - if files.recycle_one_file(&self.file_system, &dir, file_id) { - storage.pop_file(first_file_seq); + let (recycle, storage_type) = files.recycle_one_file(&self.file_system, file_id); + if recycle { // Open the recycled file(file is already renamed) + debug_assert!(storage_type.is_some()); + let storage_type = storage_type.unwrap(); + let path = file_id.build_file_path(files.storage.get_dir(storage_type).unwrap()); (Arc::new(self.file_system.open(&path)?), storage_type) + } else if let Some((d, t)) = files.storage.get_free_dir(self.target_file_size) { + // Has free space for newly writing, a new file is introduced. + let path = file_id.build_file_path(&d); + (Arc::new(self.file_system.create(&path)?), t) } else { - // A new file is introduced. - (Arc::new(self.file_system.create(&path)?), storage_type) + // Neither has stale files nor has space for writing. + return Err(Error::Other(box_err!( + "no free space for recording new logs." + ))); } }; let mut new_file = ActiveFile { @@ -486,8 +388,6 @@ impl SinglePipe { format: LogFileFormat::new(version, alignment), storage_type, }); - let mut storage = self.storage.write(); - storage.push_in_use_file(seq, storage_type); for listener in &self.listeners { listener.post_new_log_file(FileId { queue: self.queue, @@ -569,9 +469,9 @@ impl SinglePipe { } }; let has_free_space = { - let storage = self.storage.read(); - storage.get_stale_files_dir().is_some() - || storage.get_free_dir(self.target_file_size).is_some() + let files = self.files.read(); + files.first_seq < files.first_seq_in_use /* has stale files */ + || files.storage.get_free_dir(self.target_file_size).is_some() }; // If there still exists free space for this record, a special Err will // be returned to the caller. @@ -638,9 +538,8 @@ impl SinglePipe { /// Return the actual removed count of purged files. fn purge_to(&self, file_seq: FileSeq) -> Result { let ( - first_purge_seq, /* first seq for purging */ - purged, /* count of purged files */ - remained, /* count of remained files */ + purged_files, /* list of purged files */ + remained, /* count of remained files */ ) = { let mut files = self.files.write(); if file_seq >= files.first_seq + files.fds.len() as u64 { @@ -664,25 +563,34 @@ impl SinglePipe { break; } } - // Update metadata of files + // Assemble the info of purged files. let old_first_seq = files.first_seq; + let mut purged_files = Vec::<(FileSeq, String)>::default(); + purged_files.reserve(purged); + for i in 0..purged { + purged_files.push(( + i as u64 + old_first_seq, + files + .storage + .get_dir(files.fds[i].storage_type) + .unwrap() + .to_owned(), + )); + } + // Update metadata of files files.first_seq += purged as u64; files.first_seq_in_use = file_seq; files.fds.drain(..purged); - (old_first_seq, purged, files.fds.len()) + (purged_files, files.fds.len()) }; self.flush_metrics(remained); - // TODO: @lucasliang, following strategy need to be polished - // (1) how to build the purged path. - // (2) how to put the stale (obsolete) files into storage for later processing. - let mut storage = self.storage.write(); - for seq in first_purge_seq..first_purge_seq + purged as u64 { + // for seq in first_purge_seq..first_purge_seq + purged as u64 { + for (seq, dir) in purged_files.iter() { let file_id = FileId { queue: self.queue, - seq, + seq: *seq, }; - let (dir, _) = storage.find_dir(seq).unwrap(); - let path = file_id.build_file_path(&dir); + let path = file_id.build_file_path(dir); #[cfg(feature = "failpoints")] { let remove_skipped = || { @@ -694,22 +602,8 @@ impl SinglePipe { } } self.file_system.delete(&path)?; - debug_assert!(storage.pop_file(seq).is_some()); } - // Move stale files from StorageInfo.in_use_files to StorageInfo.stale_files - let (start_seq, end_seq) = { - let files = self.files.read(); - (files.first_seq, files.first_seq_in_use) - }; - for seq in (start_seq..end_seq).rev() { - match storage.pop_file(seq) { - Some(t) => { - storage.push_stale_file(seq, t); - } - None => break, - } - } - Ok(purged) + Ok(purged_files.len()) } fn fetch_active_file(&self) -> LogFileContext { @@ -997,9 +891,14 @@ mod tests { first_seq_in_use: old_file_id.seq, capacity: 3, fds: vec![new_file_handler(path, old_file_id)].into(), + storage: StorageInfo::new(path.to_owned(), None), }; // recycle an old file - assert!(!recycle_collections.recycle_one_file(&file_system, path, new_file_id)); + assert!( + !recycle_collections + .recycle_one_file(&file_system, new_file_id) + .0 + ); // update the reycle collection { recycle_collections @@ -1008,7 +907,11 @@ mod tests { recycle_collections.first_seq_in_use = cur_file_id.seq; } // recycle an old file - assert!(recycle_collections.recycle_one_file(&file_system, path, new_file_id)); + assert!( + recycle_collections + .recycle_one_file(&file_system, new_file_id) + .0 + ); // validate the content of recycled file assert!( validate_content_of_file(file_system.as_ref(), path, new_file_id, &data).unwrap() @@ -1046,6 +949,7 @@ mod tests { first_seq_in_use: fake_file_id.seq + 1, capacity: 2, fds: vec![new_file_handler(path, fake_file_id)].into(), + storage: StorageInfo::new(path.to_owned(), None), }; // mock the failure on `rename` file_system @@ -1057,11 +961,19 @@ mod tests { }; // `rename` is failed, and no stale files in recycle_collections could be // recycled. - assert!(!recycle_collections.recycle_one_file(&file_system, path, new_file_id)); + assert!( + !recycle_collections + .recycle_one_file(&file_system, new_file_id) + .0 + ); assert_eq!(recycle_collections.fds.len(), 1); // rebuild the file for recycle prepare_file(file_system.as_ref(), path, fake_file_id, &data).unwrap(); - assert!(recycle_collections.recycle_one_file(&file_system, path, new_file_id)); + assert!( + recycle_collections + .recycle_one_file(&file_system, new_file_id) + .0 + ); assert!(recycle_collections.fds.is_empty()); } } @@ -1150,75 +1062,4 @@ mod tests { assert_eq!(file_context.version, Version::V2); assert_eq!(file_context.id.seq, 3); } - - #[test] - fn test_storage_info() { - let dir = Builder::new() - .prefix("test_storage_info") - .tempdir() - .unwrap(); - let secondary_dir = Builder::new() - .prefix("test_storage_info_sec") - .tempdir() - .unwrap(); - let path = dir.path().to_str().unwrap().to_owned(); - let sec_path = secondary_dir.path().to_str().unwrap().to_owned(); - { - // Tests SingleStorageInfo. - let mut main_storage = SingleStorageInfo::new(path.clone()); - assert!(!main_storage.has_stale_file()); - assert!(!main_storage.find_stale_file(10)); - assert!(!main_storage.find_in_use_file(1)); - main_storage.push_in_use_file(10); - main_storage.push_stale_file(1); - let mut cpy_storage = main_storage.clone(); - assert!(cpy_storage.has_stale_file()); - assert!(cpy_storage.find_stale_file(1)); - assert!(cpy_storage.find_in_use_file(10)); - assert!(!cpy_storage.pop_in_use_file(1)); - assert!(cpy_storage.pop_stale_file(1)); - assert!(cpy_storage.pop_in_use_file(10)); - } - { - // Tests StorageInfo with main dir only. - let mut storage = StorageInfo::new(path.clone(), None); - assert!(storage.push_in_use_file(1, StorageDirType::Main)); - assert!(!storage.push_in_use_file(2, StorageDirType::Secondary)); - assert!(!storage.push_stale_file(2, StorageDirType::Secondary)); - assert_eq!(storage.pop_file(1).unwrap(), StorageDirType::Main); - assert!(storage.pop_file(1).is_none()); - assert!(storage.find_dir(1).is_none()); - assert!(storage.push_stale_file(1, StorageDirType::Main)); - assert_eq!( - storage.find_dir(1).unwrap(), - (path.clone(), StorageDirType::Main) - ); - assert!(storage.get_free_dir(usize::MAX).is_none()); - assert_eq!( - storage.get_free_dir(16).unwrap(), - (path.clone(), StorageDirType::Main) - ); - } - { - // Tests StorageInfo with multi dirs. - let mut storage = StorageInfo::new(path.clone(), Some(sec_path.clone())); - assert!(storage.push_in_use_file(1, StorageDirType::Main)); - assert!(storage.push_stale_file(4, StorageDirType::Main)); - assert!(storage.push_in_use_file(2, StorageDirType::Secondary)); - assert!(storage.push_stale_file(3, StorageDirType::Secondary)); - assert_eq!(storage.pop_file(1).unwrap(), StorageDirType::Main); - assert_eq!(storage.pop_file(3).unwrap(), StorageDirType::Secondary); - assert!(storage.pop_file(1).is_none()); - assert!(storage.find_dir(1).is_none()); - assert_eq!( - storage.find_dir(2).unwrap(), - (sec_path, StorageDirType::Secondary) - ); - assert!(storage.get_free_dir(usize::MAX).is_none()); - assert_eq!( - storage.get_free_dir(16).unwrap(), - (path, StorageDirType::Main) - ); - } - } } From d424d76f2665c70490b7f45303342b8664d1d8af Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Wed, 17 Aug 2022 18:45:04 +0800 Subject: [PATCH 08/22] Refine the codes. Signed-off-by: Lucasliang --- src/file_pipe_log/pipe.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index dfe087ea..ca2942ed 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -43,12 +43,12 @@ impl StorageInfo { Self { storage } } - fn get_free_dir(&self, target_size: usize) -> Option<(String, StorageDirType)> { + fn get_free_dir(&self, target_size: usize) -> Option<(&str, StorageDirType)> { #[cfg(feature = "failpoints")] { fail::fail_point!("file_pipe_log::force_use_secondary_dir", |_| { Some(( - self.storage[StorageDirType::Secondary as usize].clone(), + self.storage[StorageDirType::Secondary as usize].as_str(), StorageDirType::Secondary, )) }); @@ -70,7 +70,7 @@ impl StorageInfo { Ok(stats) => stats, }; if target_size <= disk_stats.available_space() as usize { - return Some((self.storage[idx].clone(), t)); + return Some((&self.storage[idx], t)); } } None @@ -1062,4 +1062,33 @@ mod tests { assert_eq!(file_context.version, Version::V2); assert_eq!(file_context.id.seq, 3); } + + #[test] + fn test_storage_info() { + let dir = Builder::new() + .prefix("test_storage_info_main_dir") + .tempdir() + .unwrap(); + let secondary_dir = Builder::new() + .prefix("test_storage_info_sec_dir") + .tempdir() + .unwrap(); + let path = dir.path().to_str().unwrap(); + let sec_path = secondary_dir.path().to_str().unwrap(); + { + // Test StorageInfo with main dir only. + let storage = StorageInfo::new(path.to_owned(), None); + assert_eq!(storage.get_dir(StorageDirType::Main).unwrap(), path); + assert!(storage.get_dir(StorageDirType::Secondary).is_none()); + } + { + // Test StorageInfo both with main dir and secondary dir. + let storage = StorageInfo::new(path.to_owned(), Some(sec_path.to_owned())); + assert_eq!(storage.get_dir(StorageDirType::Main).unwrap(), path); + assert_eq!( + storage.get_dir(StorageDirType::Secondary).unwrap(), + sec_path + ); + } + } } From 5704eb57b6c7666ff08e382297bcb3eae8e233ca Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Wed, 17 Aug 2022 23:24:01 +0800 Subject: [PATCH 09/22] Refine the retry strategy in Engine::write(...). Signed-off-by: Lucasliang --- src/engine.rs | 41 +++++++++++++++++++++++++++---- tests/failpoints/test_io_error.rs | 13 ++++++---- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 54897d52..39ddb8d7 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -148,9 +148,21 @@ where let now = Instant::now(); let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); let file_context = self.pipe_log.fetch_active_file(LogQueue::Append); + // If we found that there is no spare space for the next LogBatch in the current + // active file, we will mark the `force_rotate` with `true` to notify the leader + // do `rotate` immediately. + let mut force_rotate = false; for writer in group.iter_mut() { writer.entered_time = Some(now); sync |= writer.sync; + // To avoid redundant `write` check when `force_rotate` == `true`, we will + // directly assign a special `Error::Other(...)` to the following followers. + if force_rotate { + writer.set_output(Err(Error::Other(box_err!( + "Failed to append logbatch, try to dump it to other dir" + )))); + continue; + } let log_batch = writer.mut_payload(); let res = if !log_batch.is_empty() { log_batch.prepare_write(&file_context)?; @@ -164,12 +176,32 @@ where len: 0, }) }; + if let Err(Error::Other(e)) = res { + warn!( + "Cannot append, err: {}, try to re-append this log_batch into next log", + e + ); + force_rotate = true; // force to flush the current active file + writer.set_output(Err(Error::Other(box_err!( + "Failed to append logbatch, try to dump it to other dir" + )))); + continue; + } writer.set_output(res); } debug_assert!( file_context.id == self.pipe_log.fetch_active_file(LogQueue::Append).id ); perf_context!(log_write_duration).observe_since(now); + if force_rotate { + if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { + panic!( + "Cannot rotate {:?} queue due to IO error: {}", + LogQueue::Append, + e + ); + } + } if let Err(e) = self.pipe_log.maybe_sync(LogQueue::Append, sync) { panic!( "Cannot sync {:?} queue due to IO error: {}", @@ -194,12 +226,11 @@ where // Retry if `writer.finish()` returns a special err, remarking there still // exists free space for this `LogBatch`. let ret = writer.finish(); - if let Err(Error::Other(e)) = ret { - warn!( - "Cannot append, err: {}, try to re-append this log_batch into other log", - e - ); + if let Err(Error::Other(_)) = ret { log_batch.reset_to_encoded_state(); + // Here, we will retry this LogBatch `append` by appending this writer + // to the next write group, and the current write leader will not hang + // on this write and will return timely. return self.write(log_batch, sync); } ret? diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index ec804bbc..4066ba3c 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -341,14 +341,17 @@ fn test_no_space_write_error() { let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); let engine = Engine::open(cfg.clone()).unwrap(); - engine - .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) - .unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); engine .write(&mut generate_batch(3, 11, 21, Some(&entry)), true) .unwrap(); assert_eq!( - 10, + 0, engine .fetch_entries_to::(2, 11, 21, None, &mut vec![]) .unwrap() @@ -376,7 +379,7 @@ fn test_no_space_write_error() { .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) .unwrap(); assert_eq!( - 10, + 0, engine .fetch_entries_to::(2, 11, 21, None, &mut vec![]) .unwrap() From 35fc5b5d0393988f6310c072b4a37002b6723a43 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Fri, 19 Aug 2022 15:28:19 +0800 Subject: [PATCH 10/22] Refine annotations. Signed-off-by: Lucasliang --- src/engine.rs | 9 +++++---- src/file_pipe_log/pipe.rs | 1 + src/log_batch.rs | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 39ddb8d7..3031ce38 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -148,9 +148,7 @@ where let now = Instant::now(); let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); let file_context = self.pipe_log.fetch_active_file(LogQueue::Append); - // If we found that there is no spare space for the next LogBatch in the current - // active file, we will mark the `force_rotate` with `true` to notify the leader - // do `rotate` immediately. + // Flag on whether force to flush the current active file or not. let mut force_rotate = false; for writer in group.iter_mut() { writer.entered_time = Some(now); @@ -176,12 +174,15 @@ where len: 0, }) }; + // If we found that there is no spare space for the next LogBatch in the current + // active file, we will mark the `force_rotate` with `true` to notify the leader + // do `rotate` immediately. if let Err(Error::Other(e)) = res { warn!( "Cannot append, err: {}, try to re-append this log_batch into next log", e ); - force_rotate = true; // force to flush the current active file + force_rotate = true; writer.set_output(Err(Error::Other(box_err!( "Failed to append logbatch, try to dump it to other dir" )))); diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index ca2942ed..20e59cc4 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -76,6 +76,7 @@ impl StorageInfo { None } + #[inline] fn get_dir(&self, storage_type: StorageDirType) -> Option<&str> { let idx = storage_type as usize; if idx >= self.storage.len() { diff --git a/src/log_batch.rs b/src/log_batch.rs index 5000664a..92760cd4 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -834,6 +834,7 @@ impl LogBatch { } /// Resets the `LogBatch` state to `Encoded(_, _)`. + #[inline] pub(crate) fn reset_to_encoded_state(&mut self) { match self.buf_state { BufState::Sealed(header_offset, entries_len) => { From 30e0802bd1301012e908090c20becc32ea0ebce4 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 22 Aug 2022 19:13:04 +0800 Subject: [PATCH 11/22] Rename `sub-dir` with `secondary-dir` and polish annotations in `scan`. Signed-off-by: Lucasliang --- src/config.rs | 14 +++++------ src/file_pipe_log/pipe.rs | 8 +++--- src/file_pipe_log/pipe_builder.rs | 42 ++++++++++++++++--------------- tests/failpoints/test_engine.rs | 10 ++++---- 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/config.rs b/src/config.rs index 3829526f..d10faa18 100644 --- a/src/config.rs +++ b/src/config.rs @@ -31,14 +31,14 @@ pub struct Config { /// Default: "" pub dir: String, - /// Assistant directory to store log files. Will create on startup if + /// Secondary directory to store log files. Will create on startup if /// set and not exists. /// /// Newly logs will be put into this dir when the main `dir` is full /// or not accessible. /// /// Default: "" - pub sub_dir: Option, + pub secondary_dir: Option, /// How to deal with file corruption during recovery. /// @@ -106,7 +106,7 @@ impl Default for Config { #[allow(unused_mut)] let mut cfg = Config { dir: "".to_owned(), - sub_dir: None, + secondary_dir: None, recovery_mode: RecoveryMode::TolerateTailCorruption, recovery_read_block_size: ReadableSize::kb(16), recovery_threads: 4, @@ -292,22 +292,22 @@ mod tests { // Set the sub-dir with `Some("...")` let dir_list = r#" dir = "./test/" - sub-dir = 'Some("./test_secondary")' + secondary-dir = 'Some("./test_secondary")' "#; let mut load: Config = toml::from_str(dir_list).unwrap(); load.sanitize().unwrap(); - assert!(load.sub_dir.is_some()); + assert!(load.secondary_dir.is_some()); assert!(toml::to_string(&load).unwrap().contains("test_secondary")); } { // Set the sub-dir with `"..."` let wrong_dir_list = r#" dir = "./test/" - sub-dir = "./test_secondary" + secondary-dir = "./test_secondary" "#; let mut load: Config = toml::from_str(wrong_dir_list).unwrap(); load.sanitize().unwrap(); - assert!(load.sub_dir.is_some()); + assert!(load.secondary_dir.is_some()); } } } diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index 20e59cc4..f1e0c3f1 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -35,10 +35,10 @@ struct StorageInfo { } impl StorageInfo { - fn new(dir: String, sub_dir: Option) -> Self { + fn new(dir: String, secondary_dir: Option) -> Self { let mut storage = vec![dir; 1]; - if let Some(sub) = sub_dir { - storage.push(sub); + if let Some(sec_dir) = secondary_dir { + storage.push(sec_dir); } Self { storage } } @@ -234,7 +234,7 @@ impl SinglePipe { } } - let storage = StorageInfo::new(cfg.dir.clone(), cfg.sub_dir.clone()); + let storage = StorageInfo::new(cfg.dir.clone(), cfg.secondary_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index f0dd872b..6a218fc5 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -101,6 +101,8 @@ impl DualPipesBuilder { /// Scans for all log files under the working directory. The directory will /// be created if not exists. pub fn scan(&mut self) -> Result<()> { + /// Checks the given `dir` exists or not. If `dir` not exists, + /// calls `fs::create_dir` to create it. fn validate_dir(dir: &str) -> Result<()> { let path = Path::new(dir); if !path.exists() { @@ -113,7 +115,7 @@ impl DualPipesBuilder { } Ok(()) } - + /// Parses the range of file sequences in the given `dir`. fn parse_file_range( dir: &str, dir_type: StorageDirType, @@ -154,14 +156,13 @@ impl DualPipesBuilder { }); Ok(valid_file_count) } - + /// Cleans up stale metadata left by the previous version. fn clean_stale_metadata( file_system: &F, dir: &str, min_id: u64, queue: LogQueue, ) { - // Try to cleanup stale metadata left by the previous version. let max_sample = 100; // Find the first obsolete metadata. let mut delete_start = None; @@ -197,14 +198,15 @@ impl DualPipesBuilder { ); } } - - // Validate main dir. + // Steps in the procedure of `scan`: `validate_dir` -> `parse_file_range` -> + // `clean_stale_metadata` + // Validate the main dir. let dir = &self.cfg.dir; validate_dir(dir)?; self.dir_lock = Some(lock_dir(dir)?); - // Validate sub dir (secondary dir). - if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { - validate_dir(sub_dir)?; + // Validate the secondary dir. + if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { + validate_dir(secondary_dir)?; } type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ @@ -213,7 +215,7 @@ impl DualPipesBuilder { let mut rewrite_id_range = (u64::MAX, 0_u64); let mut append_file_dict: HashMap = HashMap::default(); let mut rewrite_file_dict: HashMap = HashMap::default(); - // Parse files in `dir`. + // Parse files in `cfg.dir`. parse_file_range( dir, StorageDirType::Main, @@ -222,26 +224,26 @@ impl DualPipesBuilder { &mut append_file_dict, &mut rewrite_file_dict, )?; - // Parse files in `sub-dir`. + // Parse files in `cfg.secondary_dir`. if_chain::if_chain! { - if let Some(sub_dir) = self.cfg.sub_dir.as_ref(); + if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref(); if let Ok(0) = parse_file_range( - sub_dir, + secondary_dir, StorageDirType::Secondary, &mut append_id_range, &mut rewrite_id_range, &mut append_file_dict, &mut rewrite_file_dict, ); - if let Ok(true) = from_same_dev(&self.cfg.dir, sub_dir); + if let Ok(true) = from_same_dev(&self.cfg.dir, secondary_dir); then { // If the count of valid file in secondary dir was 0, we directly - // reset the `cfg.sub_dir` to None. + // reset the `cfg.secondary_dir` to None. warn!( "sub-dir ({}) and dir ({}) are on same device, ignore it", - sub_dir, self.cfg.dir + secondary_dir, self.cfg.dir ); - self.cfg.sub_dir = None; // reset to None + self.cfg.secondary_dir = None; // reset to None } } @@ -265,17 +267,17 @@ impl DualPipesBuilder { // Clean stale metadata in main dir. clean_stale_metadata(self.file_system.as_ref(), &self.cfg.dir, min_id, queue); // Clean stale metadata in secondary dir if it was specified. - if let Some(sub_dir) = self.cfg.sub_dir.as_ref() { - clean_stale_metadata(self.file_system.as_ref(), sub_dir, min_id, queue); + if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { + clean_stale_metadata(self.file_system.as_ref(), secondary_dir, min_id, queue); } for seq in min_id..=max_id { let file_id = FileId { queue, seq }; let (dir, storage_type) = match file_dict.get(&seq) { Some(StorageDirType::Main) => (&self.cfg.dir, StorageDirType::Main), Some(StorageDirType::Secondary) => { - debug_assert!(self.cfg.sub_dir.is_some()); + debug_assert!(self.cfg.secondary_dir.is_some()); ( - self.cfg.sub_dir.as_ref().unwrap(), + self.cfg.secondary_dir.as_ref().unwrap(), StorageDirType::Secondary, ) } diff --git a/tests/failpoints/test_engine.rs b/tests/failpoints/test_engine.rs index e2c9936a..8b8299c9 100644 --- a/tests/failpoints/test_engine.rs +++ b/tests/failpoints/test_engine.rs @@ -730,7 +730,7 @@ fn test_build_engine_with_multi_dir() { .prefix("test_build_engine_with_multi_dir_1") .tempdir() .unwrap(); - let sub_dir = tempfile::Builder::new() + let secondary_dir = tempfile::Builder::new() .prefix("test_build_engine_with_multi_dir_2") .tempdir() .unwrap(); @@ -738,7 +738,7 @@ fn test_build_engine_with_multi_dir() { let rid = 1; let cfg = Config { dir: dir.path().to_str().unwrap().to_owned(), - sub_dir: Some(sub_dir.path().to_str().unwrap().to_owned()), + secondary_dir: Some(secondary_dir.path().to_str().unwrap().to_owned()), target_file_size: ReadableSize::kb(2), purge_threshold: ReadableSize::kb(4), ..Default::default() @@ -747,10 +747,10 @@ fn test_build_engine_with_multi_dir() { { // Set config with abnormal settings => same prefix let abnormal_dir = format!("{}/abnormal", dir.path().to_str().unwrap().to_owned()); - let abnormal_sub_dir = format!("{}/testing", abnormal_dir); + let abnormal_sec_dir = format!("{}/testing", abnormal_dir); let cfg_err = Config { dir: abnormal_dir, - sub_dir: Some(abnormal_sub_dir), + secondary_dir: Some(abnormal_sec_dir), ..cfg }; let engine = Engine::open(cfg_err).unwrap(); @@ -785,7 +785,7 @@ fn test_build_engine_with_multi_dir() { { // Set config with abnormal settings => same device let cfg_err = Config { - sub_dir: Some("./abnormal_testing".to_owned()), + secondary_dir: Some("./abnormal_testing".to_owned()), ..cfg.clone() }; let engine = Engine::open(cfg_err).unwrap(); From a92873e1a7b8515cb30390c1dc1395d79f10d070 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 29 Aug 2022 11:48:50 +0800 Subject: [PATCH 12/22] [Refinement]Refine the `purge` progress. Signed-off-by: Lucasliang --- src/file_pipe_log/pipe.rs | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index af688b35..717f6c76 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -88,7 +88,7 @@ impl StorageInfo { } #[inline] - fn sync_all_dir(&mut self) -> Result<()> { + fn sync_all_dir(&self) -> Result<()> { for t in StorageDirType::iter() { let idx = t as usize; if idx >= self.storage.len() { @@ -118,8 +118,6 @@ struct FileCollection { /// `fds.len()` should be no larger than `capacity` unless it is full of /// active files. capacity: usize, - /// Info of storage dir. - storage: StorageInfo, } #[derive(PartialEq, Eq, Debug)] @@ -161,8 +159,8 @@ impl FileCollection { fn logical_purge( &mut self, file_seq: FileSeq, - ) -> (FileState, FileState, Vec<(FileSeq, String)>) { - let mut purged_files = Vec::<(FileSeq, String)>::default(); + ) -> (FileState, FileState, Vec<(FileSeq, StorageDirType)>) { + let mut purged_files = Vec::<(FileSeq, StorageDirType)>::default(); let prev = FileState { first_seq: self.first_seq, first_seq_in_use: self.first_seq_in_use, @@ -185,13 +183,7 @@ impl FileCollection { } purged_files.reserve(purged); for i in 0..purged { - purged_files.push(( - i as u64 + self.first_seq, - self.storage - .get_dir(self.fds[i].storage_type) - .unwrap() - .to_owned(), - )); + purged_files.push((i as u64 + self.first_seq, self.fds[i].storage_type)); } self.first_seq += purged as u64; self.first_seq_in_use = file_seq; @@ -227,6 +219,8 @@ pub(super) struct SinglePipe { /// `active_file` must be locked first to acquire both `files` and /// `active_file` active_file: CachePadded>>, + /// Info of storage dir. + storage: StorageInfo, } impl Drop for SinglePipe { @@ -243,7 +237,7 @@ impl Drop for SinglePipe { queue: self.queue, seq, }; - let dir = files + let dir = self .storage .get_dir(files.fds[(seq - files.first_seq) as usize].storage_type); debug_assert!(dir.is_some()); @@ -344,9 +338,9 @@ impl SinglePipe { first_seq_in_use: first_seq, fds, capacity, - storage, })), active_file: CachePadded::new(Mutex::new(active_file)), + storage, }; pipe.flush_metrics(total_files); Ok(pipe) @@ -355,8 +349,7 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. fn sync_dir(&self) -> Result<()> { - let mut files = self.files.write(); - files.storage.sync_all_dir() + self.storage.sync_all_dir() } /// Returns a shared [`LogFd`] for the specified file sequence number. @@ -395,7 +388,7 @@ impl SinglePipe { let mut files = self.files.write(); if let Some((seq, storage_type)) = files.recycle_one_file() { // Has stale files for recycling, the old file will be reused. - let dir = files.storage.get_dir(storage_type).unwrap(); + let dir = self.storage.get_dir(storage_type).unwrap(); let src_file_id = FileId { queue: self.queue, seq, @@ -411,7 +404,7 @@ impl SinglePipe { } else { (Arc::new(self.file_system.open(&dst_path)?), storage_type) } - } else if let Some((d, t)) = files.storage.get_free_dir(self.target_file_size) { + } else if let Some((d, t)) = self.storage.get_free_dir(self.target_file_size) { // Has free space for newly writing, a new file is introduced. let path = file_id.build_file_path(&d); (Arc::new(self.file_system.create(&path)?), t) @@ -528,7 +521,7 @@ impl SinglePipe { let has_free_space = { let files = self.files.read(); files.first_seq < files.first_seq_in_use /* has stale files */ - || files.storage.get_free_dir(self.target_file_size).is_some() + || self.storage.get_free_dir(self.target_file_size).is_some() }; // If there still exists free space for this record, a special Err will // be returned to the caller. @@ -602,12 +595,18 @@ impl SinglePipe { debug_assert!(purged_files.is_empty()); return Ok(0); } - for (seq, dir) in purged_files.iter() { + debug_assert_eq!( + purged_files.len() as u64, + current.first_seq - prev.first_seq + ); + for (seq, dir_type) in purged_files.iter() { let file_id = FileId { queue: self.queue, seq: *seq, }; - let path = file_id.build_file_path(dir); + let dir = self.storage.get_dir(*dir_type); + debug_assert!(dir.is_some()); + let path = file_id.build_file_path(dir.unwrap()); #[cfg(feature = "failpoints")] { let remove_skipped = || { @@ -874,7 +873,6 @@ mod tests { Version::V2, )] .into(), - storage: StorageInfo::new(path.to_owned(), None), }; assert_eq!(files.recycle_one_file(), None); // | 12 13 14 From 703892f653ae19c8b5503823787474d1a204a15d Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 29 Aug 2022 23:01:08 +0800 Subject: [PATCH 13/22] [Bugfix]Fix recursive loop when trying to rewrite `LogBatch` by `Engine::write` Signed-off-by: Lucasliang --- src/engine.rs | 24 ++++++++++++---- src/log_batch.rs | 37 ++++++++++++++++++------ tests/failpoints/test_io_error.rs | 48 +++++++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 16 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 85de0fe5..afd639a6 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -148,7 +148,7 @@ where let now = Instant::now(); let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); let file_context = self.pipe_log.fetch_active_file(LogQueue::Append); - // Flag on whether force to flush the current active file or not. + // Flag on whether force to rotate the current active file or not. let mut force_rotate = false; for writer in group.iter_mut() { writer.entered_time = Some(now); @@ -195,6 +195,7 @@ where ); perf_context!(log_write_duration).observe_since(now); if force_rotate { + // TODO: encapsulating ops for `force_rotate == true`. if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { panic!( "Cannot rotate {:?} queue due to IO error: {}", @@ -228,11 +229,7 @@ where // exists free space for this `LogBatch`. let ret = writer.finish(); if let Err(Error::Other(_)) = ret { - log_batch.reset_to_encoded_state(); - // Here, we will retry this LogBatch `append` by appending this writer - // to the next write group, and the current write leader will not hang - // on this write and will return timely. - return self.write(log_batch, sync); + return self.rewrite(log_batch, sync); } ret? }; @@ -255,6 +252,21 @@ where Ok(len) } + /// Rewrites the given log_batch. + fn rewrite(&self, log_batch: &mut LogBatch, sync: bool) -> Result { + if let Err(e) = log_batch.prepare_rewrite() { + panic!( + "Cannot rewrite {:?} queue due to IO error: {}", + LogQueue::Append, + e + ); + } + // Here, we will retry this LogBatch `append` by appending this writer + // to the next write group, and the current write leader will not hang + // on this write and will return timely. + self.write(log_batch, sync) + } + /// Synchronizes the Raft engine. pub fn sync(&self) -> Result<()> { self.write(&mut LogBatch::default(), true)?; diff --git a/src/log_batch.rs b/src/log_batch.rs index 92760cd4..ff8a6255 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -555,6 +555,14 @@ enum BufState { /// # Invariants /// LOG_BATCH_HEADER_LEN <= buf.len() Sealed(usize, usize), + /// Buffer is undergoing to be re-written. This state only briefly exists + /// between writing and re-writing, user operation will panic under this + /// state. + /// # Content + /// (header_offset, entries_len) + /// # Invariants + /// LOG_BATCH_HEADER_LEN <= buf.len() + ReSealing(usize, usize), /// Buffer is undergoing writes. User operation will panic under this state. Incomplete, } @@ -727,7 +735,7 @@ impl LogBatch { /// compression type to each entry index. pub(crate) fn finish_populate(&mut self, compression_threshold: usize) -> Result { let _t = StopWatch::new(perf_context!(log_populating_duration)); - if let BufState::Encoded(header_offset, _) = self.buf_state { + if let BufState::ReSealing(header_offset, _) = self.buf_state { return Ok(self.buf.len() - header_offset); } debug_assert!(self.buf_state == BufState::Open); @@ -782,7 +790,7 @@ impl LogBatch { Ok(self.buf.len() - header_offset) } - /// Make preparations for the write of `LogBatch`. + /// Makes preparations for the write of `LogBatch`. #[inline] pub(crate) fn prepare_write(&mut self, file_context: &LogFileContext) -> Result<()> { match self.buf_state { @@ -790,6 +798,9 @@ impl LogBatch { LogItemBatch::prepare_write(&mut self.buf, file_context)?; self.buf_state = BufState::Sealed(header_offset, entries_len); } + BufState::ReSealing(_, _) => { + LogItemBatch::prepare_write(&mut self.buf, file_context)?; + } _ => unreachable!(), } Ok(()) @@ -799,7 +810,9 @@ impl LogBatch { /// Assumes called after a successful call of [`prepare_write`]. pub(crate) fn encoded_bytes(&self) -> &[u8] { match self.buf_state { - BufState::Sealed(header_offset, _) => &self.buf[header_offset..], + BufState::Sealed(header_offset, _) | BufState::ReSealing(header_offset, _) => { + &self.buf[header_offset..] + } _ => unreachable!(), } } @@ -808,12 +821,15 @@ impl LogBatch { /// /// Internally sets the file locations of each log entry indexes. pub(crate) fn finish_write(&mut self, mut handle: FileBlockHandle) { - debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _))); + debug_assert!(matches!( + self.buf_state, + BufState::Sealed(_, _) | BufState::ReSealing(_, _) + )); if !self.is_empty() { // adjust log batch handle to log entries handle. handle.offset += LOG_BATCH_HEADER_LEN as u64; match self.buf_state { - BufState::Sealed(_, entries_len) => { + BufState::Sealed(_, entries_len) | BufState::ReSealing(_, entries_len) => { debug_assert!(LOG_BATCH_HEADER_LEN + entries_len < handle.len as usize); handle.len = entries_len; } @@ -833,13 +849,18 @@ impl LogBatch { self.item_batch.drain() } - /// Resets the `LogBatch` state to `Encoded(_, _)`. + /// Makes preparations for rewriting the `LogBatch`. #[inline] - pub(crate) fn reset_to_encoded_state(&mut self) { + pub(crate) fn prepare_rewrite(&mut self) -> Result<()> { + debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _))); match self.buf_state { BufState::Sealed(header_offset, entries_len) => { - self.buf_state = BufState::Encoded(header_offset, entries_len); + self.buf_state = BufState::ReSealing(header_offset, entries_len); + Ok(()) } + BufState::ReSealing(_, _) => Err(Error::Corruption( + "LogBatch can not be rewritten for twice.".to_owned(), + )), _ => unreachable!(), } } diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index 4066ba3c..9211d16d 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -337,7 +337,7 @@ fn test_no_space_write_error() { .is_err()); } { - // Disk goes from `spare` -> `full` -> `spare`. + // Disk goes from `spare(nospace err)` -> `full` -> `spare`. let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); let engine = Engine::open(cfg.clone()).unwrap(); @@ -369,7 +369,7 @@ fn test_no_space_write_error() { let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); let cfg_err = Config { target_file_size: ReadableSize(1), - ..cfg + ..cfg.clone() }; let engine = Engine::open(cfg_err).unwrap(); engine @@ -391,6 +391,50 @@ fn test_no_space_write_error() { .unwrap() ); } + { + // Disk goes from `spare(nospace err)` -> `spare(another dir has enough space)`. + let _f = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let engine = Engine::open(cfg.clone()).unwrap(); + engine + .write(&mut generate_batch(5, 11, 21, Some(&entry)), true) + .unwrap(); + engine + .write(&mut generate_batch(6, 11, 21, Some(&entry)), true) + .unwrap(); + assert_eq!( + 10, + engine + .fetch_entries_to::(5, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(6, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // Disk goes into endless `spare(nospace err)`, engine do panic for multi- + // retrying. + let _f = FailGuard::new( + "log_fd::write::no_space_err", + "1*return->1*off->1*return->1*off", + ); + let engine = Engine::open(cfg).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(7, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); + assert_eq!( + 0, + engine + .fetch_entries_to::(7, 11, 21, None, &mut vec![]) + .unwrap() + ); + } } #[test] From 34307bb89c2ae27cf25024a9e1e1606ef8e8d7dd Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Tue, 30 Aug 2022 14:47:23 +0800 Subject: [PATCH 14/22] Refine the processing when `force_rotate == true` Signed-off-by: Lucasliang --- src/engine.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index afd639a6..05f6ad39 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -195,7 +195,8 @@ where ); perf_context!(log_write_duration).observe_since(now); if force_rotate { - // TODO: encapsulating ops for `force_rotate == true`. + // If the leader failed to `rotate` a new log for the un-synced LogBatches, + // it means that it encounters unexpected errors, and should panic. if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { panic!( "Cannot rotate {:?} queue due to IO error: {}", @@ -203,8 +204,7 @@ where e ); } - } - if let Err(e) = self.pipe_log.maybe_sync(LogQueue::Append, sync) { + } else if let Err(e) = self.pipe_log.maybe_sync(LogQueue::Append, sync) { panic!( "Cannot sync {:?} queue due to IO error: {}", LogQueue::Append, @@ -225,8 +225,8 @@ where debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); perf_context += &writer.perf_context_diff; set_perf_context(perf_context); - // Retry if `writer.finish()` returns a special err, remarking there still - // exists free space for this `LogBatch`. + // Retry if `writer.finish()` returns a special 'Error::Other', remarking that + // there still exists free space for this `LogBatch`. let ret = writer.finish(); if let Err(Error::Other(_)) = ret { return self.rewrite(log_batch, sync); @@ -252,7 +252,7 @@ where Ok(len) } - /// Rewrites the given log_batch. + /// Rewrites the content of `log_batch`. fn rewrite(&self, log_batch: &mut LogBatch, sync: bool) -> Result { if let Err(e) = log_batch.prepare_rewrite() { panic!( From a0c977b54e08e198e1c084571ee8834c19fb86c6 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 1 Sep 2022 15:53:41 +0800 Subject: [PATCH 15/22] Bugfix for rewriting a LogBatch with V2 and tidy the implementation in `scan`. Signed-off-by: Lucasliang --- src/config.rs | 74 ++++++-- src/file_pipe_log/pipe_builder.rs | 285 ++++++++++++++---------------- src/log_batch.rs | 47 +++-- 3 files changed, 231 insertions(+), 175 deletions(-) diff --git a/src/config.rs b/src/config.rs index 51a19dd0..23ae3c32 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,14 +1,32 @@ // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0. -use log::warn; +use log::{info, warn}; use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; +use crate::env::from_same_dev; use crate::pipe_log::Version; use crate::{util::ReadableSize, Result}; const MIN_RECOVERY_READ_BLOCK_SIZE: usize = 512; const MIN_RECOVERY_THREADS: usize = 1; +/// Check the given `dir` is valid or not. If `dir` not exists, +/// it would be created automatically. +fn validate_dir(dir: &str) -> Result<()> { + let path = Path::new(dir); + if !path.exists() { + info!("Create raft log directory: {}", dir); + fs::create_dir(dir)?; + return Ok(()); + } + if !path.is_dir() { + return Err(box_err!("Not directory: {}", dir)); + } + Ok(()) +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub enum RecoveryMode { @@ -170,6 +188,26 @@ impl Config { if self.memory_limit.is_some() { warn!("memory-limit will be ignored because swap feature is disabled"); } + // Validate `dir`. + if let Err(e) = validate_dir(&self.dir) { + return Err(box_err!( + "dir ({}) is invalid, err: {}, please check it again", + self.dir, + e + )); + } + // Validate `secondary-dir`. + if_chain::if_chain! { + if let Some(secondary_dir) = self.secondary_dir.as_ref(); + if validate_dir(secondary_dir).is_ok(); + if let Ok(true) = from_same_dev(&self.dir, secondary_dir); + then { + warn!( + "secondary-dir ({}) and dir ({}) are on same device, recommend setting it to another device", + secondary_dir, self.dir + ); + } + } Ok(()) } @@ -197,6 +235,11 @@ impl Config { mod tests { use super::*; + fn remove_dir(dir: &str) -> Result<()> { + fs::remove_dir_all(dir)?; + Ok(()) + } + #[test] fn test_serde() { let value = Config::default(); @@ -227,6 +270,7 @@ mod tests { #[test] fn test_invalid() { let hard_error = r#" + dir = "./" target-file-size = "5MB" purge-threshold = "3MB" "#; @@ -234,6 +278,7 @@ mod tests { assert!(hard_load.sanitize().is_err()); let soft_error = r#" + dir = "./" recovery-read-block-size = "1KB" recovery-threads = 0 bytes-per-sync = "0KB" @@ -255,6 +300,7 @@ mod tests { assert!(soft_sanitized.enable_log_recycle); let recycle_error = r#" + dir = "./" enable-log-recycle = true format-version = 1 "#; @@ -266,6 +312,7 @@ mod tests { fn test_backward_compactibility() { // Upgrade from older version. let old = r#" + dir = "./" recovery-mode = "tolerate-corrupted-tail-records" "#; let mut load: Config = toml::from_str(old).unwrap(); @@ -277,27 +324,34 @@ mod tests { } #[test] - fn test_secondary_dir() { + fn test_validate_dir_setting() { + { + let dir_list = r#""#; + let mut load: Config = toml::from_str(dir_list).unwrap(); + assert!(load.sanitize().is_err()); + } { - // Set the sub-dir with `Some("...")` + // Set the sub-dir same with main dir let dir_list = r#" - dir = "./test/" - secondary-dir = 'Some("./test_secondary")' + dir = "./test_validate_dir_setting/" + secondary-dir = "./test_validate_dir_setting/" "#; let mut load: Config = toml::from_str(dir_list).unwrap(); load.sanitize().unwrap(); assert!(load.secondary_dir.is_some()); - assert!(toml::to_string(&load).unwrap().contains("test_secondary")); + assert!(remove_dir(&load.dir).is_ok()); } { // Set the sub-dir with `"..."` - let wrong_dir_list = r#" - dir = "./test/" - secondary-dir = "./test_secondary" + let dir_list = r#" + dir = "./test_validate_dir_setting" + secondary-dir = "./test_validate_dir_setting_secondary" "#; - let mut load: Config = toml::from_str(wrong_dir_list).unwrap(); + let mut load: Config = toml::from_str(dir_list).unwrap(); load.sanitize().unwrap(); assert!(load.secondary_dir.is_some()); + assert!(remove_dir(&load.dir).is_ok()); + assert!(remove_dir(&load.secondary_dir.unwrap()).is_ok()); } } } diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 6a218fc5..deeabca4 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -10,11 +10,11 @@ use std::sync::Arc; use fs2::FileExt; use hashbrown::HashMap; -use log::{error, info, warn}; +use log::{error, warn}; use rayon::prelude::*; use crate::config::{Config, RecoveryMode}; -use crate::env::{from_same_dev, FileSystem}; +use crate::env::FileSystem; use crate::event_listener::EventListener; use crate::log_batch::LogItemBatch; use crate::pipe_log::{FileId, FileSeq, LogQueue}; @@ -85,6 +85,13 @@ pub struct DualPipesBuilder { rewrite_files: Vec>, } +type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ +type FileSeqDict = HashMap; /* HashMap */ +struct FileScanner { + file_seq_range: FileSeqRange, + file_dict: FileSeqDict, +} + impl DualPipesBuilder { /// Creates a new builder. pub fn new(cfg: Config, file_system: Arc, listeners: Vec>) -> Self { @@ -101,174 +108,63 @@ impl DualPipesBuilder { /// Scans for all log files under the working directory. The directory will /// be created if not exists. pub fn scan(&mut self) -> Result<()> { - /// Checks the given `dir` exists or not. If `dir` not exists, - /// calls `fs::create_dir` to create it. - fn validate_dir(dir: &str) -> Result<()> { - let path = Path::new(dir); - if !path.exists() { - info!("Create raft log directory: {}", dir); - fs::create_dir(dir)?; - return Ok(()); - } - if !path.is_dir() { - return Err(box_err!("Not directory: {}", dir)); - } - Ok(()) - } - /// Parses the range of file sequences in the given `dir`. - fn parse_file_range( - dir: &str, - dir_type: StorageDirType, - append_range: &mut FileSeqRange, - rewrite_range: &mut FileSeqRange, - append_dict: &mut FileSeqDict, - rewrite_dict: &mut FileSeqDict, - ) -> Result { - let path = Path::new(dir); - let mut valid_file_count = 0_u64; - fs::read_dir(path)?.for_each(|e| { - if let Ok(e) = e { - let p = e.path(); - if p.is_file() { - match FileId::parse_file_name(p.file_name().unwrap().to_str().unwrap()) { - Some(FileId { - queue: LogQueue::Append, - seq, - }) => { - append_dict.insert(seq, dir_type); - append_range.0 = std::cmp::min(append_range.0, seq); - append_range.1 = std::cmp::max(append_range.1, seq); - valid_file_count += 1; - } - Some(FileId { - queue: LogQueue::Rewrite, - seq, - }) => { - rewrite_dict.insert(seq, dir_type); - rewrite_range.0 = std::cmp::min(rewrite_range.0, seq); - rewrite_range.1 = std::cmp::max(rewrite_range.1, seq); - valid_file_count += 1; - } - _ => {} - } - } - } - }); - Ok(valid_file_count) - } - /// Cleans up stale metadata left by the previous version. - fn clean_stale_metadata( - file_system: &F, - dir: &str, - min_id: u64, - queue: LogQueue, - ) { - let max_sample = 100; - // Find the first obsolete metadata. - let mut delete_start = None; - for i in 0..max_sample { - let seq = i * min_id / max_sample; - let file_id = FileId { queue, seq }; - // Main dir - let path = file_id.build_file_path(&dir); - if file_system.exists_metadata(&path) { - delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); - break; - } - } - // Delete metadata starting from the oldest. Abort on error. - if let Some(start) = delete_start { - let mut success = 0; - for seq in start..min_id { - let file_id = FileId { queue, seq }; - let path = file_id.build_file_path(&dir); - if file_system.exists_metadata(&path) { - if let Err(e) = file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; - } - } else { - continue; - } - success += 1; - } - warn!( - "deleted {} stale files of {:?} in range [{}, {}).", - success, queue, start, min_id, - ); - } - } - // Steps in the procedure of `scan`: `validate_dir` -> `parse_file_range` -> - // `clean_stale_metadata` - // Validate the main dir. - let dir = &self.cfg.dir; - validate_dir(dir)?; - self.dir_lock = Some(lock_dir(dir)?); - // Validate the secondary dir. - if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { - validate_dir(secondary_dir)?; - } - - type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ - type FileSeqDict = HashMap; /* HashMap */ - let mut append_id_range = (u64::MAX, 0_u64); - let mut rewrite_id_range = (u64::MAX, 0_u64); - let mut append_file_dict: HashMap = HashMap::default(); - let mut rewrite_file_dict: HashMap = HashMap::default(); - // Parse files in `cfg.dir`. - parse_file_range( - dir, + let mut append_scanner = FileScanner { + file_seq_range: (u64::MAX, 0_u64), + file_dict: FileSeqDict::default(), + }; + let mut rewrite_scanner = FileScanner { + file_seq_range: (u64::MAX, 0_u64), + file_dict: FileSeqDict::default(), + }; + // Scan main `dir` and `secondary-dir`, if it was valid. + let (_, dir_lock) = DualPipesBuilder::::scan_dir( + &self.cfg.dir, StorageDirType::Main, - &mut append_id_range, - &mut rewrite_id_range, - &mut append_file_dict, - &mut rewrite_file_dict, + &mut append_scanner, + &mut rewrite_scanner, )?; - // Parse files in `cfg.secondary_dir`. - if_chain::if_chain! { - if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref(); - if let Ok(0) = parse_file_range( - secondary_dir, - StorageDirType::Secondary, - &mut append_id_range, - &mut rewrite_id_range, - &mut append_file_dict, - &mut rewrite_file_dict, - ); - if let Ok(true) = from_same_dev(&self.cfg.dir, secondary_dir); - then { - // If the count of valid file in secondary dir was 0, we directly - // reset the `cfg.secondary_dir` to None. - warn!( - "sub-dir ({}) and dir ({}) are on same device, ignore it", - secondary_dir, self.cfg.dir - ); - self.cfg.secondary_dir = None; // reset to None - } + self.dir_lock = dir_lock; + if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { + DualPipesBuilder::::scan_dir( + secondary_dir, + StorageDirType::Secondary, + &mut append_scanner, + &mut rewrite_scanner, + )?; } for (queue, min_id, max_id, files, file_dict) in [ ( LogQueue::Append, - append_id_range.0, - append_id_range.1, + append_scanner.file_seq_range.0, + append_scanner.file_seq_range.1, &mut self.append_files, - &append_file_dict, + &append_scanner.file_dict, ), ( LogQueue::Rewrite, - rewrite_id_range.0, - rewrite_id_range.1, + rewrite_scanner.file_seq_range.0, + rewrite_scanner.file_seq_range.1, &mut self.rewrite_files, - &rewrite_file_dict, + &rewrite_scanner.file_dict, ), ] { if max_id > 0 { // Clean stale metadata in main dir. - clean_stale_metadata(self.file_system.as_ref(), &self.cfg.dir, min_id, queue); + DualPipesBuilder::clean_stale_metadata( + self.file_system.as_ref(), + &self.cfg.dir, + min_id, + queue, + ); // Clean stale metadata in secondary dir if it was specified. if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { - clean_stale_metadata(self.file_system.as_ref(), secondary_dir, min_id, queue); + DualPipesBuilder::clean_stale_metadata( + self.file_system.as_ref(), + secondary_dir, + min_id, + queue, + ); } for seq in min_id..=max_id { let file_id = FileId { queue, seq }; @@ -304,6 +200,91 @@ impl DualPipesBuilder { Ok(()) } + /// Scans for all log files under the given directory. + /// + /// Returns the valid file count and the relative dir_lock. + fn scan_dir( + dir: &str, + dir_type: StorageDirType, + append_scanner: &mut FileScanner, + rewrite_scanner: &mut FileScanner, + ) -> Result<(usize, Option)> { + let dir_lock = Some(lock_dir(dir)?); + // Parse all valid files in `dir`. + let mut valid_file_count: usize = 0; + fs::read_dir(Path::new(dir))?.for_each(|e| { + if let Ok(e) = e { + let p = e.path(); + if p.is_file() { + match FileId::parse_file_name(p.file_name().unwrap().to_str().unwrap()) { + Some(FileId { + queue: LogQueue::Append, + seq, + }) => { + append_scanner.file_dict.insert(seq, dir_type); + append_scanner.file_seq_range.0 = + std::cmp::min(append_scanner.file_seq_range.0, seq); + append_scanner.file_seq_range.1 = + std::cmp::max(append_scanner.file_seq_range.1, seq); + valid_file_count += 1; + } + Some(FileId { + queue: LogQueue::Rewrite, + seq, + }) => { + rewrite_scanner.file_dict.insert(seq, dir_type); + rewrite_scanner.file_seq_range.0 = + std::cmp::min(rewrite_scanner.file_seq_range.0, seq); + rewrite_scanner.file_seq_range.1 = + std::cmp::max(rewrite_scanner.file_seq_range.1, seq); + valid_file_count += 1; + } + _ => {} + } + } + } + }); + Ok((valid_file_count, dir_lock)) + } + + /// Cleans up stale metadata left by the previous version. + fn clean_stale_metadata(file_system: &F, dir: &str, min_id: u64, queue: LogQueue) { + let max_sample = 100; + // Find the first obsolete metadata. + let mut delete_start = None; + for i in 0..max_sample { + let seq = i * min_id / max_sample; + let file_id = FileId { queue, seq }; + // Main dir + let path = file_id.build_file_path(&dir); + if file_system.exists_metadata(&path) { + delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); + break; + } + } + // Delete metadata starting from the oldest. Abort on error. + if let Some(start) = delete_start { + let mut success = 0; + for seq in start..min_id { + let file_id = FileId { queue, seq }; + let path = file_id.build_file_path(&dir); + if file_system.exists_metadata(&path) { + if let Err(e) = file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + } else { + continue; + } + success += 1; + } + warn!( + "deleted {} stale files of {:?} in range [{}, {}).", + success, queue, start, min_id, + ); + } + } + /// Reads through log items in all available log files, and replays them to /// specific [`ReplayMachine`]s that can be constructed via /// `machine_factory`. diff --git a/src/log_batch.rs b/src/log_batch.rs index ff8a6255..191c0af6 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -415,7 +415,10 @@ impl LogItemBatch { /// The `signature` is both generated by the given `LogFileContext`. /// That is, the final checksum of each `LogBatch` consists of this /// `signature` and the original `checksum` of the contents. - pub(crate) fn prepare_write(buf: &mut Vec, file_context: &LogFileContext) -> Result<()> { + pub(crate) fn prepare_write( + buf: &mut Vec, + file_context: &LogFileContext, + ) -> Result> { if !buf.is_empty() { if let Some(signature) = file_context.get_signature() { // Insert the signature into the encoded bytes. Rewrite checksum of @@ -426,8 +429,22 @@ impl LogItemBatch { // `original checksum of buf`. (&mut buf[footer_checksum_offset..]) .write_u32::(original_checksum ^ signature)?; + return Ok(Some(signature)); } } + Ok(None) + } + + /// Prepare the `rewrite` by reseting the `signature` in the `LogBatch`. + pub(crate) fn prepare_rewrite(buf: &mut Vec, signature: Option) -> Result<()> { + debug_assert!(!buf.is_empty()); + if let Some(signature) = signature { + let footer_checksum_offset = buf.len() - LOG_BATCH_CHECKSUM_LEN; + let original_checksum = codec::decode_u32_le(&mut &buf[footer_checksum_offset..])?; + // Reset the final checksum by `signature` ***XOR*** `original checksum of buf`. + (&mut buf[footer_checksum_offset..]) + .write_u32::(original_checksum ^ signature)?; + } Ok(()) } @@ -554,10 +571,10 @@ enum BufState { /// (header_offset, entries_len) /// # Invariants /// LOG_BATCH_HEADER_LEN <= buf.len() - Sealed(usize, usize), - /// Buffer is undergoing to be re-written. This state only briefly exists - /// between writing and re-writing, user operation will panic under this - /// state. + Sealed(usize, usize, Option), + /// Buffer is undergoing to be re-written. This state only exists + /// triansiently between writing and re-writing, user operation will + /// panic under this state. /// # Content /// (header_offset, entries_len) /// # Invariants @@ -795,8 +812,11 @@ impl LogBatch { pub(crate) fn prepare_write(&mut self, file_context: &LogFileContext) -> Result<()> { match self.buf_state { BufState::Encoded(header_offset, entries_len) => { - LogItemBatch::prepare_write(&mut self.buf, file_context)?; - self.buf_state = BufState::Sealed(header_offset, entries_len); + self.buf_state = BufState::Sealed( + header_offset, + entries_len, + LogItemBatch::prepare_write(&mut self.buf, file_context)?, + ); } BufState::ReSealing(_, _) => { LogItemBatch::prepare_write(&mut self.buf, file_context)?; @@ -810,7 +830,7 @@ impl LogBatch { /// Assumes called after a successful call of [`prepare_write`]. pub(crate) fn encoded_bytes(&self) -> &[u8] { match self.buf_state { - BufState::Sealed(header_offset, _) | BufState::ReSealing(header_offset, _) => { + BufState::Sealed(header_offset, _, _) | BufState::ReSealing(header_offset, _) => { &self.buf[header_offset..] } _ => unreachable!(), @@ -823,13 +843,13 @@ impl LogBatch { pub(crate) fn finish_write(&mut self, mut handle: FileBlockHandle) { debug_assert!(matches!( self.buf_state, - BufState::Sealed(_, _) | BufState::ReSealing(_, _) + BufState::Sealed(_, _, _) | BufState::ReSealing(_, _) )); if !self.is_empty() { // adjust log batch handle to log entries handle. handle.offset += LOG_BATCH_HEADER_LEN as u64; match self.buf_state { - BufState::Sealed(_, entries_len) | BufState::ReSealing(_, entries_len) => { + BufState::Sealed(_, entries_len, _) | BufState::ReSealing(_, entries_len) => { debug_assert!(LOG_BATCH_HEADER_LEN + entries_len < handle.len as usize); handle.len = entries_len; } @@ -852,9 +872,10 @@ impl LogBatch { /// Makes preparations for rewriting the `LogBatch`. #[inline] pub(crate) fn prepare_rewrite(&mut self) -> Result<()> { - debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _))); + debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _, _))); match self.buf_state { - BufState::Sealed(header_offset, entries_len) => { + BufState::Sealed(header_offset, entries_len, signature) => { + LogItemBatch::prepare_rewrite(&mut self.buf, signature)?; self.buf_state = BufState::ReSealing(header_offset, entries_len); Ok(()) } @@ -876,7 +897,7 @@ impl LogBatch { self.buf.len() + LOG_BATCH_CHECKSUM_LEN + self.item_batch.approximate_size() } BufState::Encoded(header_offset, _) => self.buf.len() - header_offset, - BufState::Sealed(header_offset, _) => self.buf.len() - header_offset, + BufState::Sealed(header_offset, _, _) => self.buf.len() - header_offset, s => { error!("querying incomplete log batch with state {:?}", s); 0 From 193d34f2a1dcf850eb7f472269ce732cf53bbd4d Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 1 Sep 2022 18:47:55 +0800 Subject: [PATCH 16/22] Refactor the `rewrite` progress when `write` meets `NOSPC` error. Signed-off-by: Lucasliang --- src/engine.rs | 202 ++++++++++----------- src/log_batch.rs | 31 +--- tests/failpoints/test_io_error.rs | 281 ++++++++++++++++-------------- 3 files changed, 255 insertions(+), 259 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 05f6ad39..fd83989e 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -139,99 +139,118 @@ where let start = Instant::now(); let len = log_batch.finish_populate(self.cfg.batch_compression_threshold.0 as usize)?; let block_handle = { - let mut writer = Writer::new(log_batch, sync); - // Snapshot and clear the current perf context temporarily, so the write group - // leader will collect the perf context diff later. - let mut perf_context = take_perf_context(); - let before_enter = Instant::now(); - if let Some(mut group) = self.write_barrier.enter(&mut writer) { - let now = Instant::now(); - let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); - let file_context = self.pipe_log.fetch_active_file(LogQueue::Append); - // Flag on whether force to rotate the current active file or not. - let mut force_rotate = false; - for writer in group.iter_mut() { - writer.entered_time = Some(now); - sync |= writer.sync; - // To avoid redundant `write` check when `force_rotate` == `true`, we will - // directly assign a special `Error::Other(...)` to the following followers. - if force_rotate { - writer.set_output(Err(Error::Other(box_err!( - "Failed to append logbatch, try to dump it to other dir" - )))); - continue; - } - let log_batch = writer.mut_payload(); - let res = if !log_batch.is_empty() { - log_batch.prepare_write(&file_context)?; - self.pipe_log - .append(LogQueue::Append, log_batch.encoded_bytes()) - } else { - // TODO(tabokie): use Option instead. - Ok(FileBlockHandle { - id: FileId::new(LogQueue::Append, 0), - offset: 0, - len: 0, - }) - }; - // If we found that there is no spare space for the next LogBatch in the current - // active file, we will mark the `force_rotate` with `true` to notify the leader - // do `rotate` immediately. - if let Err(Error::Other(e)) = res { - warn!( - "Cannot append, err: {}, try to re-append this log_batch into next log", - e - ); - force_rotate = true; - writer.set_output(Err(Error::Other(box_err!( - "Failed to append logbatch, try to dump it to other dir" - )))); - continue; + // Max retry count is limited to `2`. If the first `append` retry because of + // `NOSPC` error, the next `append` should success, unless there exists + // several abnormal cases in the IO device. In that case, + // `Engine::write` must return `Err`. + let mut retry_count = 2; + let mut handle = Ok(FileBlockHandle { + id: FileId::new(LogQueue::Append, 0), + offset: 0, + len: 0, + }); + while retry_count > 0 { + let mut writer = Writer::new(log_batch, sync); + // Snapshot and clear the current perf context temporarily, so the write group + // leader will collect the perf context diff later. + let mut perf_context = take_perf_context(); + let before_enter = Instant::now(); + if let Some(mut group) = self.write_barrier.enter(&mut writer) { + let now = Instant::now(); + let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); + let file_context = self.pipe_log.fetch_active_file(LogQueue::Append); + // Flag on whether force to rotate the current active file or not. + let mut force_rotate = false; + for writer in group.iter_mut() { + writer.entered_time = Some(now); + sync |= writer.sync; + // To avoid redundant `write` check when `force_rotate` == `true`, we will + // directly assign a special `Error::Other(...)` to the following followers. + if force_rotate { + writer.set_output(Err(Error::Other(box_err!( + "Failed to append logbatch, try to dump it to other dir" + )))); + continue; + } + let log_batch = writer.mut_payload(); + let res = if !log_batch.is_empty() { + log_batch.prepare_write(&file_context)?; + self.pipe_log + .append(LogQueue::Append, log_batch.encoded_bytes()) + } else { + // TODO(tabokie): use Option instead. + Ok(FileBlockHandle { + id: FileId::new(LogQueue::Append, 0), + offset: 0, + len: 0, + }) + }; + // If we found that there is no spare space for the next LogBatch in the + // current active file, we will mark the `force_rotate` with `true` to + // notify the leader do `rotate` immediately. + if let Err(Error::Other(e)) = res { + warn!( + "Cannot append, err: {}, try to re-append this log_batch into next log", + e + ); + force_rotate = true; + writer.set_output(Err(Error::Other(box_err!( + "Failed to append logbatch, try to dump it to other dir" + )))); + continue; + } + writer.set_output(res); } - writer.set_output(res); - } - debug_assert!( - file_context.id == self.pipe_log.fetch_active_file(LogQueue::Append).id - ); - perf_context!(log_write_duration).observe_since(now); - if force_rotate { - // If the leader failed to `rotate` a new log for the un-synced LogBatches, - // it means that it encounters unexpected errors, and should panic. - if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { + debug_assert!( + file_context.id == self.pipe_log.fetch_active_file(LogQueue::Append).id + ); + perf_context!(log_write_duration).observe_since(now); + if force_rotate { + // If the leader failed to `rotate` a new log for the un-synced LogBatches, + // it means that it encounters unexpected errors, and should panic. + if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { + panic!( + "Cannot rotate {:?} queue due to IO error: {}", + LogQueue::Append, + e + ); + } + } else if let Err(e) = self.pipe_log.maybe_sync(LogQueue::Append, sync) { panic!( - "Cannot rotate {:?} queue due to IO error: {}", + "Cannot sync {:?} queue due to IO error: {}", LogQueue::Append, e ); } - } else if let Err(e) = self.pipe_log.maybe_sync(LogQueue::Append, sync) { - panic!( - "Cannot sync {:?} queue due to IO error: {}", - LogQueue::Append, - e - ); + // Pass the perf context diff to all the writers. + let diff = get_perf_context(); + for writer in group.iter_mut() { + writer.perf_context_diff = diff.clone(); + } } - // Pass the perf context diff to all the writers. - let diff = get_perf_context(); - for writer in group.iter_mut() { - writer.perf_context_diff = diff.clone(); + let entered_time = writer.entered_time.unwrap(); + ENGINE_WRITE_PREPROCESS_DURATION_HISTOGRAM + .observe(entered_time.saturating_duration_since(start).as_secs_f64()); + perf_context.write_wait_duration += + entered_time.saturating_duration_since(before_enter); + debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); + perf_context += &writer.perf_context_diff; + set_perf_context(perf_context); + // Retry if `writer.finish()` returns a special 'Error::Other', remarking that + // there still exists free space for this `LogBatch`. + handle = writer.finish(); + if let Err(Error::Other(_)) = handle { + retry_count -= 1; + log_batch.prepare_rewrite()?; + // Here, we will retry this LogBatch `append` by appending this writer + // to the next write group, and the current write leader will not hang + // on this write and will return timely. + continue; + } else { + break; } } - let entered_time = writer.entered_time.unwrap(); - ENGINE_WRITE_PREPROCESS_DURATION_HISTOGRAM - .observe(entered_time.saturating_duration_since(start).as_secs_f64()); - perf_context.write_wait_duration += - entered_time.saturating_duration_since(before_enter); - debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); - perf_context += &writer.perf_context_diff; - set_perf_context(perf_context); - // Retry if `writer.finish()` returns a special 'Error::Other', remarking that - // there still exists free space for this `LogBatch`. - let ret = writer.finish(); - if let Err(Error::Other(_)) = ret { - return self.rewrite(log_batch, sync); - } - ret? + handle? }; let mut now = Instant::now(); @@ -252,21 +271,6 @@ where Ok(len) } - /// Rewrites the content of `log_batch`. - fn rewrite(&self, log_batch: &mut LogBatch, sync: bool) -> Result { - if let Err(e) = log_batch.prepare_rewrite() { - panic!( - "Cannot rewrite {:?} queue due to IO error: {}", - LogQueue::Append, - e - ); - } - // Here, we will retry this LogBatch `append` by appending this writer - // to the next write group, and the current write leader will not hang - // on this write and will return timely. - self.write(log_batch, sync) - } - /// Synchronizes the Raft engine. pub fn sync(&self) -> Result<()> { self.write(&mut LogBatch::default(), true)?; diff --git a/src/log_batch.rs b/src/log_batch.rs index 191c0af6..aa016c2e 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -568,18 +568,10 @@ enum BufState { /// state only briefly exists between encoding and writing, user operation /// will panic under this state. /// # Content - /// (header_offset, entries_len) + /// (header_offset, entries_len, signature) /// # Invariants /// LOG_BATCH_HEADER_LEN <= buf.len() Sealed(usize, usize, Option), - /// Buffer is undergoing to be re-written. This state only exists - /// triansiently between writing and re-writing, user operation will - /// panic under this state. - /// # Content - /// (header_offset, entries_len) - /// # Invariants - /// LOG_BATCH_HEADER_LEN <= buf.len() - ReSealing(usize, usize), /// Buffer is undergoing writes. User operation will panic under this state. Incomplete, } @@ -752,7 +744,7 @@ impl LogBatch { /// compression type to each entry index. pub(crate) fn finish_populate(&mut self, compression_threshold: usize) -> Result { let _t = StopWatch::new(perf_context!(log_populating_duration)); - if let BufState::ReSealing(header_offset, _) = self.buf_state { + if let BufState::Encoded(header_offset, _) = self.buf_state { return Ok(self.buf.len() - header_offset); } debug_assert!(self.buf_state == BufState::Open); @@ -818,9 +810,6 @@ impl LogBatch { LogItemBatch::prepare_write(&mut self.buf, file_context)?, ); } - BufState::ReSealing(_, _) => { - LogItemBatch::prepare_write(&mut self.buf, file_context)?; - } _ => unreachable!(), } Ok(()) @@ -830,9 +819,7 @@ impl LogBatch { /// Assumes called after a successful call of [`prepare_write`]. pub(crate) fn encoded_bytes(&self) -> &[u8] { match self.buf_state { - BufState::Sealed(header_offset, _, _) | BufState::ReSealing(header_offset, _) => { - &self.buf[header_offset..] - } + BufState::Sealed(header_offset, _, _) => &self.buf[header_offset..], _ => unreachable!(), } } @@ -841,15 +828,12 @@ impl LogBatch { /// /// Internally sets the file locations of each log entry indexes. pub(crate) fn finish_write(&mut self, mut handle: FileBlockHandle) { - debug_assert!(matches!( - self.buf_state, - BufState::Sealed(_, _, _) | BufState::ReSealing(_, _) - )); + debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _, _))); if !self.is_empty() { // adjust log batch handle to log entries handle. handle.offset += LOG_BATCH_HEADER_LEN as u64; match self.buf_state { - BufState::Sealed(_, entries_len, _) | BufState::ReSealing(_, entries_len) => { + BufState::Sealed(_, entries_len, _) => { debug_assert!(LOG_BATCH_HEADER_LEN + entries_len < handle.len as usize); handle.len = entries_len; } @@ -876,12 +860,9 @@ impl LogBatch { match self.buf_state { BufState::Sealed(header_offset, entries_len, signature) => { LogItemBatch::prepare_rewrite(&mut self.buf, signature)?; - self.buf_state = BufState::ReSealing(header_offset, entries_len); + self.buf_state = BufState::Encoded(header_offset, entries_len); Ok(()) } - BufState::ReSealing(_, _) => Err(Error::Corruption( - "LogBatch can not be rewritten for twice.".to_owned(), - )), _ => unreachable!(), } } diff --git a/tests/failpoints/test_io_error.rs b/tests/failpoints/test_io_error.rs index 9211d16d..4a975230 100644 --- a/tests/failpoints/test_io_error.rs +++ b/tests/failpoints/test_io_error.rs @@ -289,151 +289,162 @@ fn test_concurrent_write_error() { #[test] fn test_no_space_write_error() { - let dir = tempfile::Builder::new() - .prefix("test_no_space_write_error") - .tempdir() - .unwrap(); - let cfg = Config { - dir: dir.path().to_str().unwrap().to_owned(), - target_file_size: ReadableSize(2048), - ..Default::default() - }; + let mut cfg_list = [ + Config { + target_file_size: ReadableSize::kb(2), + recovery_mode: RecoveryMode::AbsoluteConsistency, + format_version: Version::V1, + enable_log_recycle: false, + ..Default::default() + }, + Config { + target_file_size: ReadableSize::kb(2), + recovery_mode: RecoveryMode::AbsoluteConsistency, + format_version: Version::V2, + enable_log_recycle: true, + ..Default::default() + }, + ]; let entry = vec![b'x'; 1024]; - { - // If disk is full, a new Engine cannot be opened. - let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); - assert!(Engine::open(cfg.clone()).is_err()); - } - { - // If disk is full after writing, the old engine should be available - // for `read`. - let engine = Engine::open(cfg.clone()).unwrap(); - engine - .write(&mut generate_batch(1, 11, 21, Some(&entry)), true) + for cfg in cfg_list.iter_mut() { + let dir = tempfile::Builder::new() + .prefix("test_no_space_write_error") + .tempdir() .unwrap(); - drop(engine); - let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); - let engine = Engine::open(cfg.clone()).unwrap(); - assert_eq!( - 10, + cfg.dir = dir.path().to_str().unwrap().to_owned(); + { + // If disk is full, a new Engine cannot be opened. + let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + assert!(Engine::open(cfg.clone()).is_err()); + } + { + // If disk is full after writing, the old engine should be available + // for `read`. + let engine = Engine::open(cfg.clone()).unwrap(); engine - .fetch_entries_to::(1, 11, 21, None, &mut vec![]) - .unwrap() - ); - } - { - // `Write` is abnormal for no space left, Engine should panic at `rotate`. - let _f = FailGuard::new("log_fd::write::no_space_err", "return"); - let cfg_err = Config { - target_file_size: ReadableSize(1), - ..cfg.clone() - }; - let engine = Engine::open(cfg_err).unwrap(); - assert!(catch_unwind_silent(|| { + .write(&mut generate_batch(1, 11, 21, Some(&entry)), true) + .unwrap(); + drop(engine); + let _f = FailGuard::new("file_pipe_log::force_no_free_space", "return"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert_eq!( + 10, + engine + .fetch_entries_to::(1, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // `Write` is abnormal for no space left, Engine should panic at `rotate`. + let _f = FailGuard::new("log_fd::write::no_space_err", "return"); + let cfg_err = Config { + target_file_size: ReadableSize(1), + ..cfg.clone() + }; + let engine = Engine::open(cfg_err).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); + } + { + // Disk goes from `spare(nospace err)` -> `full` -> `spare`. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let engine = Engine::open(cfg.clone()).unwrap(); + assert!(catch_unwind_silent(|| { + engine + .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .unwrap_err(); + }) + .is_err()); engine - .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) - .unwrap_err(); - }) - .is_err()); - } - { - // Disk goes from `spare(nospace err)` -> `full` -> `spare`. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*off->1*return->off"); - let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); - let engine = Engine::open(cfg.clone()).unwrap(); - assert!(catch_unwind_silent(|| { + .write(&mut generate_batch(3, 11, 21, Some(&entry)), true) + .unwrap(); + assert_eq!( + 0, + engine + .fetch_entries_to::(2, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(3, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // Disk is `full` -> `spare`, the first `write` operation should failed. + let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); + let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let cfg_err = Config { + target_file_size: ReadableSize(1), + ..cfg.clone() + }; + let engine = Engine::open(cfg_err).unwrap(); engine - .write(&mut generate_batch(2, 11, 21, Some(&entry)), true) + .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) .unwrap_err(); - }) - .is_err()); - engine - .write(&mut generate_batch(3, 11, 21, Some(&entry)), true) - .unwrap(); - assert_eq!( - 0, - engine - .fetch_entries_to::(2, 11, 21, None, &mut vec![]) - .unwrap() - ); - assert_eq!( - 10, - engine - .fetch_entries_to::(3, 11, 21, None, &mut vec![]) - .unwrap() - ); - } - { - // Disk is `full` -> `spare`, the first `write` operation should failed. - let _f1 = FailGuard::new("file_pipe_log::force_no_free_space", "1*return->off"); - let _f2 = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); - let cfg_err = Config { - target_file_size: ReadableSize(1), - ..cfg.clone() - }; - let engine = Engine::open(cfg_err).unwrap(); - engine - .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) - .unwrap_err(); - engine - .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) - .unwrap(); - assert_eq!( - 0, - engine - .fetch_entries_to::(2, 11, 21, None, &mut vec![]) - .unwrap() - ); - assert_eq!( - 10, engine - .fetch_entries_to::(4, 11, 21, None, &mut vec![]) - .unwrap() - ); - } - { - // Disk goes from `spare(nospace err)` -> `spare(another dir has enough space)`. - let _f = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); - let engine = Engine::open(cfg.clone()).unwrap(); - engine - .write(&mut generate_batch(5, 11, 21, Some(&entry)), true) - .unwrap(); - engine - .write(&mut generate_batch(6, 11, 21, Some(&entry)), true) - .unwrap(); - assert_eq!( - 10, + .write(&mut generate_batch(4, 11, 21, Some(&entry)), true) + .unwrap(); + assert_eq!( + 0, + engine + .fetch_entries_to::(2, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(4, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // Disk goes from `spare(nospace err)` -> `spare(another dir has enough space)`. + let _f = FailGuard::new("log_fd::write::no_space_err", "1*return->off"); + let engine = Engine::open(cfg.clone()).unwrap(); engine - .fetch_entries_to::(5, 11, 21, None, &mut vec![]) - .unwrap() - ); - assert_eq!( - 10, - engine - .fetch_entries_to::(6, 11, 21, None, &mut vec![]) - .unwrap() - ); - } - { - // Disk goes into endless `spare(nospace err)`, engine do panic for multi- - // retrying. - let _f = FailGuard::new( - "log_fd::write::no_space_err", - "1*return->1*off->1*return->1*off", - ); - let engine = Engine::open(cfg).unwrap(); - assert!(catch_unwind_silent(|| { + .write(&mut generate_batch(5, 11, 21, Some(&entry)), true) + .unwrap(); engine + .write(&mut generate_batch(6, 11, 21, Some(&entry)), true) + .unwrap(); + assert_eq!( + 10, + engine + .fetch_entries_to::(5, 11, 21, None, &mut vec![]) + .unwrap() + ); + assert_eq!( + 10, + engine + .fetch_entries_to::(6, 11, 21, None, &mut vec![]) + .unwrap() + ); + } + { + // Disk goes into endless `spare(nospace err)`, engine do panic for multi- + // retrying. + let _f = FailGuard::new( + "log_fd::write::no_space_err", + "1*return->1*off->1*return->1*off", + ); + let engine = Engine::open(cfg.clone()).unwrap(); + assert!(engine .write(&mut generate_batch(7, 11, 21, Some(&entry)), true) - .unwrap_err(); - }) - .is_err()); - assert_eq!( - 0, - engine - .fetch_entries_to::(7, 11, 21, None, &mut vec![]) - .unwrap() - ); + .is_err()); + assert_eq!( + 0, + engine + .fetch_entries_to::(7, 11, 21, None, &mut vec![]) + .unwrap() + ); + } } } From f306981eb4fd9e74acb93a27f178efed6428db5c Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 1 Sep 2022 19:11:29 +0800 Subject: [PATCH 17/22] Clean unnecessary testing dirs in uts. Signed-off-by: Lucasliang --- tests/failpoints/test_engine.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/failpoints/test_engine.rs b/tests/failpoints/test_engine.rs index ecbe1055..1bd7f767 100644 --- a/tests/failpoints/test_engine.rs +++ b/tests/failpoints/test_engine.rs @@ -1,5 +1,6 @@ // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0. +use std::fs; use std::sync::{Arc, Barrier}; use std::time::Duration; @@ -789,7 +790,7 @@ fn test_build_engine_with_multi_dir() { secondary_dir: Some("./abnormal_testing".to_owned()), ..cfg.clone() }; - let engine = Engine::open(cfg_err).unwrap(); + let engine = Engine::open(cfg_err.clone()).unwrap(); append(&engine, rid, 1, 2, Some(&data)); // file_seq: 1 append(&engine, rid, 2, 3, Some(&data)); append(&engine, rid, 3, 4, Some(&data)); // file_seq: 2 @@ -806,6 +807,7 @@ fn test_build_engine_with_multi_dir() { engine.purge_expired_files().unwrap(); assert!(engine.file_span(LogQueue::Append).0 > start_seq); start_seq = engine.file_span(LogQueue::Append).0; + assert!(fs::remove_dir_all(&cfg_err.secondary_dir.unwrap()).is_ok()); } // Open engine with multi directories, main dir and secondary dir. { From 00b7059972c4385bf5ada529471033634a415c5e Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Tue, 6 Sep 2022 17:44:52 +0800 Subject: [PATCH 18/22] Tidy style of annotations. Signed-off-by: Lucasliang --- src/engine.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 527889b7..e00e34e9 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -210,8 +210,9 @@ where ); } } else if sync { - // As per trait protocol, this error should be retriable. But we panic anyway to - // save the trouble of propagating it to other group members. + // As per trait protocol, this error should be retriable. But we panic + // anyway to save the trouble of propagating it to + // other group members. self.pipe_log.sync(LogQueue::Append).expect("pipe::sync()"); } // Pass the perf context diff to all the writers. From 0a0b7054d8c0da7c504d3d337ef99850f9be2743 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 8 Sep 2022 18:22:17 +0800 Subject: [PATCH 19/22] Refine the code style, and refine the strategy for `write` LogBatchs. Signed-off-by: Lucasliang --- src/config.rs | 4 +- src/engine.rs | 171 ++++++++++++++---------------- src/env/default.rs | 25 ----- src/env/mod.rs | 2 - src/file_pipe_log/pipe.rs | 155 ++++++++++++++------------- src/file_pipe_log/pipe_builder.rs | 28 ++--- src/log_batch.rs | 23 ++-- src/pipe_log.rs | 1 + src/purge.rs | 4 +- src/util.rs | 33 ++++++ 10 files changed, 220 insertions(+), 226 deletions(-) diff --git a/src/config.rs b/src/config.rs index 05f9f283..e341684d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,8 +5,8 @@ use serde::{Deserialize, Serialize}; use std::fs; use std::path::Path; -use crate::env::from_same_dev; use crate::pipe_log::Version; +use crate::util::dev_ext::on_same_dev; use crate::{util::ReadableSize, Result}; const MIN_RECOVERY_READ_BLOCK_SIZE: usize = 512; @@ -200,7 +200,7 @@ impl Config { if_chain::if_chain! { if let Some(secondary_dir) = self.secondary_dir.as_ref(); if validate_dir(secondary_dir).is_ok(); - if let Ok(true) = from_same_dev(&self.dir, secondary_dir); + if let Ok(true) = on_same_dev(&self.dir, secondary_dir); then { warn!( "secondary-dir ({}) and dir ({}) are on same device, recommend setting it to another device", diff --git a/src/engine.rs b/src/engine.rs index f332b726..dc2ef298 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -25,6 +25,8 @@ use crate::write_barrier::{WriteBarrier, Writer}; use crate::{perf_context, Error, GlobalStats, Result}; const METRICS_FLUSH_INTERVAL: Duration = Duration::from_secs(30); +/// Max retry count for `write`. +const MAX_WRITE_RETRY_COUNT: u64 = 2; pub struct Engine> where @@ -142,103 +144,92 @@ where let start = Instant::now(); let len = log_batch.finish_populate(self.cfg.batch_compression_threshold.0 as usize)?; debug_assert!(len > 0); - let block_handle = { + + let mut attempt_count = 0_u64; + // Flag on whether force to rotate the current active file or not. + let mut force_rotate = false; + let block_handle = loop { // Max retry count is limited to `2`. If the first `append` retry because of // `NOSPC` error, the next `append` should success, unless there exists - // several abnormal cases in the IO device. In that case, - // `Engine::write` must return `Err`. - let mut retry_count = 2; - let mut handle = Ok(FileBlockHandle { - id: FileId::new(LogQueue::Append, 0), - offset: 0, - len: 0, - }); - while retry_count > 0 { - let mut writer = Writer::new(log_batch, sync); - // Snapshot and clear the current perf context temporarily, so the write group - // leader will collect the perf context diff later. - let mut perf_context = take_perf_context(); - let before_enter = Instant::now(); - if let Some(mut group) = self.write_barrier.enter(&mut writer) { - let now = Instant::now(); - let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); - // Flag on whether force to rotate the current active file or not. - let mut force_rotate = false; - for writer in group.iter_mut() { - writer.entered_time = Some(now); - sync |= writer.sync; - // To avoid redundant `write` check when `force_rotate` == `true`, we will - // directly assign a special `Error::Other(...)` to the following followers. - if force_rotate { - writer.set_output(Err(Error::Other(box_err!( - "Failed to append logbatch, try to dump it to other dir" - )))); - continue; - } - let log_batch = writer.mut_payload(); - let res = self.pipe_log.append(LogQueue::Append, log_batch); - // If we found that there is no spare space for the next LogBatch in the - // current active file, we will mark the `force_rotate` with `true` to - // notify the leader do `rotate` immediately. - if let Err(Error::Other(e)) = res { - warn!( - "Cannot append, err: {}, try to re-append this log_batch into next log", - e - ); - force_rotate = true; - writer.set_output(Err(Error::Other(box_err!( - "Failed to append logbatch, try to dump it to other dir" - )))); - continue; - } - writer.set_output(res); - } - perf_context!(log_write_duration).observe_since(now); - if force_rotate { - // If the leader failed to `rotate` a new log for the un-synced LogBatches, - // it means that it encounters unexpected errors, and should panic. - if let Err(e) = self.pipe_log.rotate(LogQueue::Append) { - panic!( - "Cannot rotate {:?} queue due to IO error: {}", - LogQueue::Append, - e - ); - } - } else if sync { - // As per trait protocol, this error should be retriable. But we panic - // anyway to save the trouble of propagating it to - // other group members. - self.pipe_log.sync(LogQueue::Append).expect("pipe::sync()"); + // several abnormal cases in the IO device. In that case, `Engine::write` + // must return `Err`. + attempt_count += 1; + let mut writer = Writer::new(log_batch, sync); + // Snapshot and clear the current perf context temporarily, so the write group + // leader will collect the perf context diff later. + let mut perf_context = take_perf_context(); + let before_enter = Instant::now(); + if let Some(mut group) = self.write_barrier.enter(&mut writer) { + let now = Instant::now(); + let _t = StopWatch::new_with(&*ENGINE_WRITE_LEADER_DURATION_HISTOGRAM, now); + for writer in group.iter_mut() { + writer.entered_time = Some(now); + sync |= writer.sync; + + let log_batch = writer.mut_payload(); + let res = self + .pipe_log + .append(LogQueue::Append, log_batch, force_rotate); + // If we found that there is no spare space for the next LogBatch in the + // current active file, we will mark the `force_rotate` with `true` to + // notify the leader do `rotate` immediately. + if let Err(Error::Other(e)) = res { + warn!( + "Cannot append, err: {}, try to re-append this log_batch into next log", + e + ); + // Notify the next `append` to rotate current file. + force_rotate = true; + writer.set_output(Err(Error::Other(box_err!( + "Failed to append logbatch, try to dump it to other dir" + )))); + continue; } - // Pass the perf context diff to all the writers. - let diff = get_perf_context(); - for writer in group.iter_mut() { - writer.perf_context_diff = diff.clone(); + force_rotate = false; + writer.set_output(res); + } + perf_context!(log_write_duration).observe_since(now); + if sync { + // As per trait protocol, this error should be retriable. But we panic + // anyway to save the trouble of propagating it to + // other group members. + self.pipe_log.sync(LogQueue::Append).expect("pipe::sync()"); + } + // Pass the perf context diff to all the writers. + let diff = get_perf_context(); + for writer in group.iter_mut() { + writer.perf_context_diff = diff.clone(); + } + } + let entered_time = writer.entered_time.unwrap(); + ENGINE_WRITE_PREPROCESS_DURATION_HISTOGRAM + .observe(entered_time.saturating_duration_since(start).as_secs_f64()); + perf_context.write_wait_duration += + entered_time.saturating_duration_since(before_enter); + debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); + perf_context += &writer.perf_context_diff; + set_perf_context(perf_context); + // Retry if `writer.finish()` returns a special 'Error::Other', remarking that + // there still exists free space for this `LogBatch`. + match writer.finish() { + Ok(handle) => { + break handle; + } + Err(Error::Other(_)) => { + // A special err, we will retry this LogBatch `append` by appending + // this writer to the next write group, and the current write leader + // will not hang on this write and will return timely. + if attempt_count >= MAX_WRITE_RETRY_COUNT { + return Err(Error::Other(box_err!( + "Failed to write logbatch, exceed max_retry_count: ({})", + MAX_WRITE_RETRY_COUNT + ))); } } - let entered_time = writer.entered_time.unwrap(); - ENGINE_WRITE_PREPROCESS_DURATION_HISTOGRAM - .observe(entered_time.saturating_duration_since(start).as_secs_f64()); - perf_context.write_wait_duration += - entered_time.saturating_duration_since(before_enter); - debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); - perf_context += &writer.perf_context_diff; - set_perf_context(perf_context); - // Retry if `writer.finish()` returns a special 'Error::Other', remarking that - // there still exists free space for this `LogBatch`. - handle = writer.finish(); - if let Err(Error::Other(_)) = handle { - retry_count -= 1; - log_batch.prepare_rewrite()?; - // Here, we will retry this LogBatch `append` by appending this writer - // to the next write group, and the current write leader will not hang - // on this write and will return timely. - continue; - } else { - break; + Err(e) => { + return Err(e); } } - handle? }; let mut now = Instant::now(); log_batch.finish_write(block_handle); diff --git a/src/env/default.rs b/src/env/default.rs index 8987939c..7f8432b6 100644 --- a/src/env/default.rs +++ b/src/env/default.rs @@ -21,31 +21,6 @@ fn from_nix_error(e: nix::Error, custom: &'static str) -> std::io::Error { std::io::Error::new(kind, custom) } -pub fn from_same_dev>(dir1: P, dir2: P) -> IoResult { - fail_point!("env::force_on_different_dev", |_| { Ok(false) }); - if dir1.as_ref().starts_with(dir2.as_ref()) || dir2.as_ref().starts_with(dir1.as_ref()) { - return Ok(true); - } - #[cfg(any(target_os = "unix", target_os = "macos"))] - { - use std::os::unix::fs::MetadataExt; - let meta1 = std::fs::metadata(dir1.as_ref())?; - let meta2 = std::fs::metadata(dir2.as_ref())?; - Ok(meta1.dev() == meta2.dev()) - } - #[cfg(target_os = "linux")] - { - use std::os::linux::fs::MetadataExt; - let meta1 = std::fs::metadata(dir1.as_ref())?; - let meta2 = std::fs::metadata(dir2.as_ref())?; - Ok(meta1.st_dev() == meta2.st_dev()) - } - #[cfg(not(any(target_os = "linux", target_os = "unix", target_os = "macos")))] - { - unimplemented!() - } -} - /// A RAII-style low-level file. Errors occurred during automatic resource /// release are logged and ignored. /// diff --git a/src/env/mod.rs b/src/env/mod.rs index 4dae7582..6ae4bf9d 100644 --- a/src/env/mod.rs +++ b/src/env/mod.rs @@ -10,8 +10,6 @@ mod obfuscated; pub use default::DefaultFileSystem; pub use obfuscated::ObfuscatedFileSystem; -pub(crate) use default::from_same_dev; - /// FileSystem pub trait FileSystem: Send + Sync { type Handle: Send + Sync + Handle; diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index e0abbbb8..19292b15 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -25,78 +25,75 @@ use super::log_file::{build_file_reader, build_file_writer, LogFileWriter}; #[repr(u8)] #[derive(Clone, Copy, PartialEq, Eq, Debug, EnumIter)] -pub enum StorageDirType { +pub enum DirPathId { Main = 0, Secondary = 1, } -/// Represents the info of storage dirs, including `main dir` and +/// Mananges multi dirs for storing logs, including `main dir` and /// `secondary dir`. -struct StorageInfo { - storage: Vec, +struct DirectoryManager { + dirs: Vec, } -impl StorageInfo { +impl DirectoryManager { fn new(dir: String, secondary_dir: Option) -> Self { - let mut storage = vec![dir; 1]; + let mut dirs = vec![dir; 1]; if let Some(sec_dir) = secondary_dir { - storage.push(sec_dir); + dirs.push(sec_dir); } - Self { storage } + Self { dirs } } #[inline] - fn get_free_dir(&self, target_size: usize) -> Option<(&str, StorageDirType)> { + fn get_free_dir(&self, target_size: usize) -> Option<(&str, DirPathId)> { #[cfg(feature = "failpoints")] { fail::fail_point!("file_pipe_log::force_use_secondary_dir", |_| { Some(( - self.storage[StorageDirType::Secondary as usize].as_str(), - StorageDirType::Secondary, + self.dirs[DirPathId::Secondary as usize].as_str(), + DirPathId::Secondary, )) }); fail::fail_point!("file_pipe_log::force_no_free_space", |_| { None }); } - for t in StorageDirType::iter() { + for t in DirPathId::iter() { let idx = t as usize; - if idx >= self.storage.len() { + if idx >= self.dirs.len() { break; } - let disk_stats = match fs2::statvfs(&self.storage[idx]) { + let disk_stats = match fs2::statvfs(&self.dirs[idx]) { Err(e) => { error!( "get disk stat for raft engine failed, dir_path: {}, err: {}", - &self.storage[idx], e + &self.dirs[idx], e ); return None; } Ok(stats) => stats, }; if target_size <= disk_stats.available_space() as usize { - return Some((&self.storage[idx], t)); + return Some((&self.dirs[idx], t)); } } None } #[inline] - fn get_dir(&self, storage_type: StorageDirType) -> Option<&str> { - let idx = storage_type as usize; - if idx >= self.storage.len() { + fn get_dir(&self, path_id: DirPathId) -> Option<&str> { + let idx = path_id as usize; + if idx >= self.dirs.len() { None } else { - Some(&self.storage[idx]) + Some(&self.dirs[idx]) } } #[inline] - fn sync_all_dir(&self) -> Result<()> { - for t in StorageDirType::iter() { - let idx = t as usize; - if idx >= self.storage.len() { - break; - } - let path = PathBuf::from(&self.storage[idx]); + fn sync_dir(&self, path_id: DirPathId) -> Result<()> { + let idx = path_id as usize; + if idx < self.dirs.len() { + let path = PathBuf::from(&self.dirs[idx]); std::fs::File::open(path).and_then(|d| d.sync_all())?; } Ok(()) @@ -107,7 +104,7 @@ impl StorageInfo { pub struct FileWithFormat { pub handle: Arc, pub format: LogFileFormat, - pub storage_type: StorageDirType, + pub path_id: DirPathId, } struct FileCollection { @@ -133,15 +130,15 @@ struct FileState { impl FileCollection { /// Takes a stale file if there is one. #[inline] - fn recycle_one_file(&mut self) -> Option<(FileSeq, StorageDirType)> { + fn recycle_one_file(&mut self) -> Option<(FileSeq, DirPathId)> { debug_assert!(self.first_seq <= self.first_seq_in_use); debug_assert!(!self.fds.is_empty()); if self.first_seq < self.first_seq_in_use { let seq = self.first_seq; - let storage_type = self.fds[0].storage_type; + let path_id = self.fds[0].path_id; self.fds.pop_front().unwrap(); self.first_seq += 1; - Some((seq, storage_type)) + Some((seq, path_id)) } else { None } @@ -161,8 +158,8 @@ impl FileCollection { fn logical_purge( &mut self, file_seq: FileSeq, - ) -> (FileState, FileState, Vec<(FileSeq, StorageDirType)>) { - let mut purged_files = Vec::<(FileSeq, StorageDirType)>::default(); + ) -> (FileState, FileState, Vec<(FileSeq, DirPathId)>) { + let mut purged_files = Vec::<(FileSeq, DirPathId)>::default(); let prev = FileState { first_seq: self.first_seq, first_seq_in_use: self.first_seq_in_use, @@ -185,7 +182,7 @@ impl FileCollection { } purged_files.reserve(purged); for i in 0..purged { - purged_files.push((i as u64 + self.first_seq, self.fds[i].storage_type)); + purged_files.push((i as u64 + self.first_seq, self.fds[i].path_id)); } self.first_seq += purged as u64; self.first_seq_in_use = file_seq; @@ -220,8 +217,8 @@ pub(super) struct SinglePipe { /// `active_file` must be locked first to acquire both `files` and /// `active_file` active_file: CachePadded>>, - /// Info of storage dir. - storage: StorageInfo, + /// Manager of directory. + dir_manager: DirectoryManager, } impl Drop for SinglePipe { @@ -239,8 +236,8 @@ impl Drop for SinglePipe { seq, }; let dir = self - .storage - .get_dir(files.fds[(seq - files.first_seq) as usize].storage_type); + .dir_manager + .get_dir(files.fds[(seq - files.first_seq) as usize].path_id); debug_assert!(dir.is_some()); let path = file_id.build_file_path(dir.unwrap()); if let Err(e) = self.file_system.delete(&path) { @@ -280,11 +277,11 @@ impl SinglePipe { } } - let storage = StorageInfo::new(cfg.dir.clone(), cfg.secondary_dir.clone()); + let dir_manager = DirectoryManager::new(cfg.dir.clone(), cfg.secondary_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; - let (dir, dir_type) = match storage.get_free_dir(cfg.target_file_size.0 as usize) { + let (dir, path_id) = match dir_manager.get_free_dir(cfg.target_file_size.0 as usize) { Some((d, t)) => (d, t), None => { // No space for writing. @@ -301,7 +298,7 @@ impl SinglePipe { fds.push_back(FileWithFormat { handle: fd, format: LogFileFormat::new(cfg.format_version, alignment), - storage_type: dir_type, + path_id, }); first_seq } else { @@ -340,7 +337,7 @@ impl SinglePipe { capacity, })), active_file: CachePadded::new(Mutex::new(active_file)), - storage, + dir_manager, }; pipe.flush_metrics(total_files); Ok(pipe) @@ -348,8 +345,8 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. - fn sync_dir(&self) -> Result<()> { - self.storage.sync_all_dir() + fn sync_dir(&self, path_id: DirPathId) -> Result<()> { + self.dir_manager.sync_dir(path_id) } /// Returns a shared [`LogFd`] for the specified file sequence number. @@ -384,11 +381,11 @@ impl SinglePipe { }; // Generate a new fd from a newly chosen file, might be reused from a stale // file or generated from a newly created file. - let (fd, storage_type) = { + let (fd, path_id) = { let mut files = self.files.write(); - if let Some((seq, storage_type)) = files.recycle_one_file() { + if let Some((seq, path_id)) = files.recycle_one_file() { // Has stale files for recycling, the old file will be reused. - let dir = self.storage.get_dir(storage_type).unwrap(); + let dir = self.dir_manager.get_dir(path_id).unwrap(); let src_file_id = FileId { queue: self.queue, seq, @@ -400,11 +397,11 @@ impl SinglePipe { if let Err(e) = self.file_system.delete(&src_path) { error!("error while trying to delete one expired file: {}", e); } - (Arc::new(self.file_system.create(&dst_path)?), storage_type) + (Arc::new(self.file_system.create(&dst_path)?), path_id) } else { - (Arc::new(self.file_system.open(&dst_path)?), storage_type) + (Arc::new(self.file_system.open(&dst_path)?), path_id) } - } else if let Some((d, t)) = self.storage.get_free_dir(self.target_file_size) { + } else if let Some((d, t)) = self.dir_manager.get_free_dir(self.target_file_size) { // Has free space for newly writing, a new file is introduced. let path = file_id.build_file_path(&d); (Arc::new(self.file_system.create(&path)?), t) @@ -430,7 +427,7 @@ impl SinglePipe { // File header must be persisted. This way we can recover gracefully if power // loss before a new entry is written. new_file.writer.sync()?; - self.sync_dir()?; + self.sync_dir(path_id)?; let version = new_file.format.version; let alignment = new_file.format.alignment; **active_file = new_file; @@ -438,7 +435,7 @@ impl SinglePipe { let state = self.files.write().push(FileWithFormat { handle: fd, format: LogFileFormat::new(version, alignment), - storage_type, + path_id, }); for listener in &self.listeners { listener.post_new_log_file(FileId { @@ -468,10 +465,14 @@ impl SinglePipe { reader.read(handle) } - fn append(&self, bytes: &mut T) -> Result { + fn append( + &self, + bytes: &mut T, + force_rotate: bool, + ) -> Result { fail_point!("file_pipe_log::append"); let mut active_file = self.active_file.lock(); - if active_file.writer.offset() >= self.target_file_size { + if active_file.writer.offset() >= self.target_file_size || force_rotate { if let Err(e) = self.rotate_imp(&mut active_file) { panic!( "error when rotate [{:?}:{}]: {}", @@ -533,7 +534,7 @@ impl SinglePipe { let has_free_space = { let files = self.files.read(); files.first_seq < files.first_seq_in_use /* has stale files */ - || self.storage.get_free_dir(self.target_file_size).is_some() + || self.dir_manager.get_free_dir(self.target_file_size).is_some() }; // If there still exists free space for this record, a special Err will // be returned to the caller. @@ -612,7 +613,7 @@ impl SinglePipe { queue: self.queue, seq: *seq, }; - let dir = self.storage.get_dir(*dir_type); + let dir = self.dir_manager.get_dir(*dir_type); debug_assert!(dir.is_some()); let path = file_id.build_file_path(dir.unwrap()); #[cfg(feature = "failpoints")] @@ -674,8 +675,9 @@ impl PipeLog for DualPipes { &self, queue: LogQueue, bytes: &mut T, + force_rotate: bool, ) -> Result { - self.pipes[queue as usize].append(bytes) + self.pipes[queue as usize].append(bytes, force_rotate) } #[inline] @@ -778,12 +780,12 @@ mod tests { // generate file 1, 2, 3 let content: Vec = vec![b'a'; 1024]; - let file_handle = pipe_log.append(queue, &mut &content).unwrap(); + let file_handle = pipe_log.append(queue, &mut &content, false).unwrap(); assert_eq!(file_handle.id.seq, 1); assert_eq!(file_handle.offset, header_size); assert_eq!(pipe_log.file_span(queue).1, 1); - let file_handle = pipe_log.append(queue, &mut &content).unwrap(); + let file_handle = pipe_log.append(queue, &mut &content, false).unwrap(); assert_eq!(file_handle.id.seq, 2); assert_eq!(file_handle.offset, header_size); assert_eq!(pipe_log.file_span(queue).1, 2); @@ -799,11 +801,11 @@ mod tests { // append position let s_content = b"short content".to_vec(); - let file_handle = pipe_log.append(queue, &mut &s_content).unwrap(); + let file_handle = pipe_log.append(queue, &mut &s_content, false).unwrap(); assert_eq!(file_handle.id.seq, 3); assert_eq!(file_handle.offset, header_size); - let file_handle = pipe_log.append(queue, &mut &s_content).unwrap(); + let file_handle = pipe_log.append(queue, &mut &s_content, false).unwrap(); assert_eq!(file_handle.id.seq, 3); assert_eq!( file_handle.offset, @@ -845,7 +847,7 @@ mod tests { .unwrap(), ), format: LogFileFormat::new(version, 0 /* alignment */), - storage_type: StorageDirType::Main, + path_id: DirPathId::Main, } } let dir = Builder::new() @@ -932,7 +934,7 @@ mod tests { } let mut handles = Vec::new(); for i in 0..10 { - handles.push(pipe_log.append(&mut &content(i)).unwrap()); + handles.push(pipe_log.append(&mut &content(i), false).unwrap()); pipe_log.sync().unwrap(); } pipe_log.rotate().unwrap(); @@ -954,7 +956,7 @@ mod tests { // Try to reuse. let mut handles = Vec::new(); for i in 0..10 { - handles.push(pipe_log.append(&mut &content(i + 1)).unwrap()); + handles.push(pipe_log.append(&mut &content(i + 1), false).unwrap()); pipe_log.sync().unwrap(); } // Verify the data. @@ -964,31 +966,28 @@ mod tests { } #[test] - fn test_storage_info() { + fn test_directory_manager() { let dir = Builder::new() - .prefix("test_storage_info_main_dir") + .prefix("test_directory_manager_main_dir") .tempdir() .unwrap(); let secondary_dir = Builder::new() - .prefix("test_storage_info_sec_dir") + .prefix("test_directory_manager_sec_dir") .tempdir() .unwrap(); let path = dir.path().to_str().unwrap(); let sec_path = secondary_dir.path().to_str().unwrap(); { - // Test StorageInfo with main dir only. - let storage = StorageInfo::new(path.to_owned(), None); - assert_eq!(storage.get_dir(StorageDirType::Main).unwrap(), path); - assert!(storage.get_dir(StorageDirType::Secondary).is_none()); + // Test DirectoryManager with main dir only. + let dir_manager = DirectoryManager::new(path.to_owned(), None); + assert_eq!(dir_manager.get_dir(DirPathId::Main).unwrap(), path); + assert!(dir_manager.get_dir(DirPathId::Secondary).is_none()); } { - // Test StorageInfo both with main dir and secondary dir. - let storage = StorageInfo::new(path.to_owned(), Some(sec_path.to_owned())); - assert_eq!(storage.get_dir(StorageDirType::Main).unwrap(), path); - assert_eq!( - storage.get_dir(StorageDirType::Secondary).unwrap(), - sec_path - ); + // Test DirectoryManager both with main dir and secondary dir. + let dir_manager = DirectoryManager::new(path.to_owned(), Some(sec_path.to_owned())); + assert_eq!(dir_manager.get_dir(DirPathId::Main).unwrap(), path); + assert_eq!(dir_manager.get_dir(DirPathId::Secondary).unwrap(), sec_path); } } } diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index deeabca4..83d3bb30 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -23,7 +23,7 @@ use crate::{Error, Result}; use super::format::{lock_file_path, FileNameExt, LogFileFormat}; use super::log_file::build_file_reader; -use super::pipe::{DualPipes, FileWithFormat, SinglePipe, StorageDirType}; +use super::pipe::{DirPathId, DualPipes, FileWithFormat, SinglePipe}; use super::reader::LogItemBatchFileReader; use crate::env::Handle; @@ -68,7 +68,7 @@ struct FileToRecover { seq: FileSeq, handle: Arc, format: Option, - storage_type: StorageDirType, + path_id: DirPathId, } /// [`DualPipes`] factory that can also recover other customized memory states. @@ -86,7 +86,7 @@ pub struct DualPipesBuilder { } type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ -type FileSeqDict = HashMap; /* HashMap */ +type FileSeqDict = HashMap; /* HashMap */ struct FileScanner { file_seq_range: FileSeqRange, file_dict: FileSeqDict, @@ -119,7 +119,7 @@ impl DualPipesBuilder { // Scan main `dir` and `secondary-dir`, if it was valid. let (_, dir_lock) = DualPipesBuilder::::scan_dir( &self.cfg.dir, - StorageDirType::Main, + DirPathId::Main, &mut append_scanner, &mut rewrite_scanner, )?; @@ -127,7 +127,7 @@ impl DualPipesBuilder { if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { DualPipesBuilder::::scan_dir( secondary_dir, - StorageDirType::Secondary, + DirPathId::Secondary, &mut append_scanner, &mut rewrite_scanner, )?; @@ -168,13 +168,13 @@ impl DualPipesBuilder { } for seq in min_id..=max_id { let file_id = FileId { queue, seq }; - let (dir, storage_type) = match file_dict.get(&seq) { - Some(StorageDirType::Main) => (&self.cfg.dir, StorageDirType::Main), - Some(StorageDirType::Secondary) => { + let (dir, path_id) = match file_dict.get(&seq) { + Some(DirPathId::Main) => (&self.cfg.dir, DirPathId::Main), + Some(DirPathId::Secondary) => { debug_assert!(self.cfg.secondary_dir.is_some()); ( self.cfg.secondary_dir.as_ref().unwrap(), - StorageDirType::Secondary, + DirPathId::Secondary, ) } None => { @@ -192,7 +192,7 @@ impl DualPipesBuilder { seq, handle, format: None, - storage_type, + path_id, }); } } @@ -205,7 +205,7 @@ impl DualPipesBuilder { /// Returns the valid file count and the relative dir_lock. fn scan_dir( dir: &str, - dir_type: StorageDirType, + path_id: DirPathId, append_scanner: &mut FileScanner, rewrite_scanner: &mut FileScanner, ) -> Result<(usize, Option)> { @@ -221,7 +221,7 @@ impl DualPipesBuilder { queue: LogQueue::Append, seq, }) => { - append_scanner.file_dict.insert(seq, dir_type); + append_scanner.file_dict.insert(seq, path_id); append_scanner.file_seq_range.0 = std::cmp::min(append_scanner.file_seq_range.0, seq); append_scanner.file_seq_range.1 = @@ -232,7 +232,7 @@ impl DualPipesBuilder { queue: LogQueue::Rewrite, seq, }) => { - rewrite_scanner.file_dict.insert(seq, dir_type); + rewrite_scanner.file_dict.insert(seq, path_id); rewrite_scanner.file_seq_range.0 = std::cmp::min(rewrite_scanner.file_seq_range.0, seq); rewrite_scanner.file_seq_range.1 = @@ -486,7 +486,7 @@ impl DualPipesBuilder { .map(|f| FileWithFormat { handle: f.handle.clone(), format: f.format.unwrap(), - storage_type: f.storage_type, + path_id: f.path_id, }) .collect(); SinglePipe::open( diff --git a/src/log_batch.rs b/src/log_batch.rs index e42703bf..2064b2e4 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -810,6 +810,15 @@ impl LogBatch { LogItemBatch::prepare_write(&mut self.buf, file_context)?, ); } + BufState::Sealed(header_offset, entries_len, signature) => { + LogItemBatch::prepare_rewrite(&mut self.buf, signature)?; + // Re-seal the LogBatch. + self.buf_state = BufState::Sealed( + header_offset, + entries_len, + LogItemBatch::prepare_write(&mut self.buf, file_context)?, + ); + } _ => unreachable!(), } Ok(()) @@ -853,20 +862,6 @@ impl LogBatch { self.item_batch.drain() } - /// Makes preparations for rewriting the `LogBatch`. - #[inline] - pub(crate) fn prepare_rewrite(&mut self) -> Result<()> { - debug_assert!(matches!(self.buf_state, BufState::Sealed(_, _, _))); - match self.buf_state { - BufState::Sealed(header_offset, entries_len, signature) => { - LogItemBatch::prepare_rewrite(&mut self.buf, signature)?; - self.buf_state = BufState::Encoded(header_offset, entries_len); - Ok(()) - } - _ => unreachable!(), - } - } - /// Returns approximate encoded size of this log batch. Might be larger /// than the actual size. pub fn approximate_size(&self) -> usize { diff --git a/src/pipe_log.rs b/src/pipe_log.rs index aa9b7925..43d465ab 100644 --- a/src/pipe_log.rs +++ b/src/pipe_log.rs @@ -178,6 +178,7 @@ pub trait PipeLog: Sized { &self, queue: LogQueue, bytes: &mut T, + force_rotate: bool, ) -> Result; /// Synchronizes all buffered writes. diff --git a/src/purge.rs b/src/purge.rs index cfefd219..61a3631a 100644 --- a/src/purge.rs +++ b/src/purge.rs @@ -364,7 +364,9 @@ where return self.pipe_log.sync(LogQueue::Rewrite); } log_batch.finish_populate(self.cfg.batch_compression_threshold.0 as usize)?; - let file_handle = self.pipe_log.append(LogQueue::Rewrite, log_batch)?; + let file_handle = + self.pipe_log + .append(LogQueue::Rewrite, log_batch, false /* force_rotate */)?; if sync { self.pipe_log.sync(LogQueue::Rewrite)? } diff --git a/src/util.rs b/src/util.rs index 05bb15e6..53e4a9af 100644 --- a/src/util.rs +++ b/src/util.rs @@ -325,6 +325,39 @@ pub trait Factory: Send + Sync { fn new_target(&self) -> Target; } +pub mod dev_ext { + use std::io::Result as IoResult; + use std::path::Path; + + use fail::fail_point; + + /// Judges whether `dir1` and `dir2` reside on same device. + pub fn on_same_dev>(dir1: P, dir2: P) -> IoResult { + fail_point!("env::force_on_different_dev", |_| { Ok(false) }); + if dir1.as_ref().starts_with(dir2.as_ref()) || dir2.as_ref().starts_with(dir1.as_ref()) { + return Ok(true); + } + #[cfg(any(target_os = "unix", target_os = "macos"))] + { + use std::os::unix::fs::MetadataExt; + let meta1 = std::fs::metadata(dir1.as_ref())?; + let meta2 = std::fs::metadata(dir2.as_ref())?; + Ok(meta1.dev() == meta2.dev()) + } + #[cfg(target_os = "linux")] + { + use std::os::linux::fs::MetadataExt; + let meta1 = std::fs::metadata(dir1.as_ref())?; + let meta2 = std::fs::metadata(dir2.as_ref())?; + Ok(meta1.st_dev() == meta2.st_dev()) + } + #[cfg(not(any(target_os = "linux", target_os = "unix", target_os = "macos")))] + { + Err(Error::Other(box_err!("Unrecognized file system"))) + } + } +} + /// Returns an aligned `offset`. /// /// # Example: From 6e67a291e836d2135d2f2298e4c28f22ad5dc84b Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 15 Sep 2022 16:43:40 +0800 Subject: [PATCH 20/22] Refine code styls of `scan` in pipe_build.rs Signed-off-by: Lucasliang --- src/config.rs | 23 +-- src/file_pipe_log/pipe.rs | 40 ++--- src/file_pipe_log/pipe_builder.rs | 250 +++++++++++++----------------- src/util.rs | 19 ++- 4 files changed, 152 insertions(+), 180 deletions(-) diff --git a/src/config.rs b/src/config.rs index e341684d..56e8c672 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,32 +1,15 @@ // Copyright (c) 2017-present, PingCAP, Inc. Licensed under Apache-2.0. -use log::{info, warn}; +use log::warn; use serde::{Deserialize, Serialize}; -use std::fs; -use std::path::Path; use crate::pipe_log::Version; -use crate::util::dev_ext::on_same_dev; +use crate::util::dev_ext::{on_same_dev, validate_dir}; use crate::{util::ReadableSize, Result}; const MIN_RECOVERY_READ_BLOCK_SIZE: usize = 512; const MIN_RECOVERY_THREADS: usize = 1; -/// Check the given `dir` is valid or not. If `dir` not exists, -/// it would be created automatically. -fn validate_dir(dir: &str) -> Result<()> { - let path = Path::new(dir); - if !path.exists() { - info!("Create raft log directory: {}", dir); - fs::create_dir(dir)?; - return Ok(()); - } - if !path.is_dir() { - return Err(box_err!("Not directory: {}", dir)); - } - Ok(()) -} - #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub enum RecoveryMode { @@ -236,7 +219,7 @@ mod tests { use super::*; fn remove_dir(dir: &str) -> Result<()> { - fs::remove_dir_all(dir)?; + std::fs::remove_dir_all(dir)?; Ok(()) } diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index 02a2b628..bbae1591 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -218,7 +218,7 @@ pub(super) struct SinglePipe { /// `active_file` active_file: CachePadded>>, /// Manager of directory. - dir_manager: DirectoryManager, + dir_mgr: DirectoryManager, } impl Drop for SinglePipe { @@ -236,7 +236,7 @@ impl Drop for SinglePipe { seq, }; let dir = self - .dir_manager + .dir_mgr .get_dir(files.fds[(seq - files.first_seq) as usize].path_id); debug_assert!(dir.is_some()); let path = file_id.build_file_path(dir.unwrap()); @@ -277,11 +277,11 @@ impl SinglePipe { } } - let dir_manager = DirectoryManager::new(cfg.dir.clone(), cfg.secondary_dir.clone()); + let dir_mgr = DirectoryManager::new(cfg.dir.clone(), cfg.secondary_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; - let (dir, path_id) = match dir_manager.get_free_dir(cfg.target_file_size.0 as usize) { + let (dir, path_id) = match dir_mgr.get_free_dir(cfg.target_file_size.0 as usize) { Some((d, t)) => (d, t), None => { // No space for writing. @@ -337,7 +337,7 @@ impl SinglePipe { capacity, })), active_file: CachePadded::new(Mutex::new(active_file)), - dir_manager, + dir_mgr, }; pipe.flush_metrics(total_files); Ok(pipe) @@ -346,7 +346,7 @@ impl SinglePipe { /// Synchronizes all metadatas associated with the working directory to the /// filesystem. fn sync_dir(&self, path_id: DirPathId) -> Result<()> { - self.dir_manager.sync_dir(path_id) + self.dir_mgr.sync_dir(path_id) } /// Returns a shared [`LogFd`] for the specified file sequence number. @@ -385,7 +385,7 @@ impl SinglePipe { let mut files = self.files.write(); if let Some((seq, path_id)) = files.recycle_one_file() { // Has stale files for recycling, the old file will be reused. - let dir = self.dir_manager.get_dir(path_id).unwrap(); + let dir = self.dir_mgr.get_dir(path_id).unwrap(); let src_file_id = FileId { queue: self.queue, seq, @@ -401,7 +401,7 @@ impl SinglePipe { } else { (Arc::new(self.file_system.open(&dst_path)?), path_id) } - } else if let Some((d, t)) = self.dir_manager.get_free_dir(self.target_file_size) { + } else if let Some((d, t)) = self.dir_mgr.get_free_dir(self.target_file_size) { // Has free space for newly writing, a new file is introduced. let path = file_id.build_file_path(&d); (Arc::new(self.file_system.create(&path)?), t) @@ -534,7 +534,7 @@ impl SinglePipe { let has_free_space = { let files = self.files.read(); files.first_seq < files.first_seq_in_use /* has stale files */ - || self.dir_manager.get_free_dir(self.target_file_size).is_some() + || self.dir_mgr.get_free_dir(self.target_file_size).is_some() }; // If there still exists free space for this record, a special Err will // be returned to the caller. @@ -613,7 +613,7 @@ impl SinglePipe { queue: self.queue, seq: *seq, }; - let dir = self.dir_manager.get_dir(*dir_type); + let dir = self.dir_mgr.get_dir(*dir_type); debug_assert!(dir.is_some()); let path = file_id.build_file_path(dir.unwrap()); #[cfg(feature = "failpoints")] @@ -637,14 +637,14 @@ impl SinglePipe { pub struct DualPipes { pipes: [SinglePipe; 2], - _dir_lock: File, + _dir_locks: Vec, } impl DualPipes { /// Open a new [`DualPipes`]. Assumes the two [`SinglePipe`]s share the /// same directory, and that directory is locked by `dir_lock`. pub(super) fn open( - dir_lock: File, + dir_locks: Vec, appender: SinglePipe, rewriter: SinglePipe, ) -> Result { @@ -654,7 +654,7 @@ impl DualPipes { Ok(Self { pipes: [appender, rewriter], - _dir_lock: dir_lock, + _dir_locks: dir_locks, }) } @@ -738,7 +738,7 @@ mod tests { fn new_test_pipes(cfg: &Config) -> Result> { DualPipes::open( - lock_dir(&cfg.dir)?, + vec![lock_dir(&cfg.dir)?], new_test_pipe(cfg, LogQueue::Append, Arc::new(DefaultFileSystem))?, new_test_pipe(cfg, LogQueue::Rewrite, Arc::new(DefaultFileSystem))?, ) @@ -979,15 +979,15 @@ mod tests { let sec_path = secondary_dir.path().to_str().unwrap(); { // Test DirectoryManager with main dir only. - let dir_manager = DirectoryManager::new(path.to_owned(), None); - assert_eq!(dir_manager.get_dir(DirPathId::Main).unwrap(), path); - assert!(dir_manager.get_dir(DirPathId::Secondary).is_none()); + let dir_mgr = DirectoryManager::new(path.to_owned(), None); + assert_eq!(dir_mgr.get_dir(DirPathId::Main).unwrap(), path); + assert!(dir_mgr.get_dir(DirPathId::Secondary).is_none()); } { // Test DirectoryManager both with main dir and secondary dir. - let dir_manager = DirectoryManager::new(path.to_owned(), Some(sec_path.to_owned())); - assert_eq!(dir_manager.get_dir(DirPathId::Main).unwrap(), path); - assert_eq!(dir_manager.get_dir(DirPathId::Secondary).unwrap(), sec_path); + let dir_mgr = DirectoryManager::new(path.to_owned(), Some(sec_path.to_owned())); + assert_eq!(dir_mgr.get_dir(DirPathId::Main).unwrap(), path); + assert_eq!(dir_mgr.get_dir(DirPathId::Secondary).unwrap(), sec_path); } } } diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 83d3bb30..333e9f3f 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -9,7 +9,6 @@ use std::path::Path; use std::sync::Arc; use fs2::FileExt; -use hashbrown::HashMap; use log::{error, warn}; use rayon::prelude::*; @@ -78,20 +77,13 @@ pub struct DualPipesBuilder { listeners: Vec>, /// Only filled after a successful call of `DualPipesBuilder::scan`. - dir_lock: Option, + dir_locks: Option>, /// Only filled after a successful call of `DualPipesBuilder::scan`. append_files: Vec>, /// Only filled after a successful call of `DualPipesBuilder::scan`. rewrite_files: Vec>, } -type FileSeqRange = (u64, u64); /* (minimal_id, maximal_id) */ -type FileSeqDict = HashMap; /* HashMap */ -struct FileScanner { - file_seq_range: FileSeqRange, - file_dict: FileSeqDict, -} - impl DualPipesBuilder { /// Creates a new builder. pub fn new(cfg: Config, file_system: Arc, listeners: Vec>) -> Self { @@ -99,102 +91,106 @@ impl DualPipesBuilder { cfg, file_system, listeners, - dir_lock: None, + dir_locks: None, append_files: Vec::new(), rewrite_files: Vec::new(), } } - /// Scans for all log files under the working directory. The directory will - /// be created if not exists. + /// Scans for all log files under the working directory. pub fn scan(&mut self) -> Result<()> { - let mut append_scanner = FileScanner { - file_seq_range: (u64::MAX, 0_u64), - file_dict: FileSeqDict::default(), - }; - let mut rewrite_scanner = FileScanner { - file_seq_range: (u64::MAX, 0_u64), - file_dict: FileSeqDict::default(), - }; - // Scan main `dir` and `secondary-dir`, if it was valid. + // Scan main `dir` and `secondary-dir`, if `secondary-dir` is valid. + let mut dir_locks: Vec = Vec::new(); let (_, dir_lock) = DualPipesBuilder::::scan_dir( + &self.file_system, &self.cfg.dir, DirPathId::Main, - &mut append_scanner, - &mut rewrite_scanner, + &mut self.append_files, + &mut self.rewrite_files, )?; - self.dir_lock = dir_lock; + dir_locks.push(dir_lock); if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { - DualPipesBuilder::::scan_dir( + let (_, sec_dir_lock) = DualPipesBuilder::::scan_dir( + &self.file_system, secondary_dir, DirPathId::Secondary, - &mut append_scanner, - &mut rewrite_scanner, + &mut self.append_files, + &mut self.rewrite_files, )?; + dir_locks.push(sec_dir_lock); } + self.dir_locks = Some(dir_locks); - for (queue, min_id, max_id, files, file_dict) in [ - ( - LogQueue::Append, - append_scanner.file_seq_range.0, - append_scanner.file_seq_range.1, - &mut self.append_files, - &append_scanner.file_dict, - ), - ( - LogQueue::Rewrite, - rewrite_scanner.file_seq_range.0, - rewrite_scanner.file_seq_range.1, - &mut self.rewrite_files, - &rewrite_scanner.file_dict, - ), + // Sorts the expected `file_list` according to `file_seq`. + self.append_files.sort_by(|a, b| a.seq.cmp(&b.seq)); + self.rewrite_files.sort_by(|a, b| a.seq.cmp(&b.seq)); + + for (queue, files) in [ + (LogQueue::Append, &mut self.append_files), + (LogQueue::Rewrite, &mut self.rewrite_files), ] { - if max_id > 0 { - // Clean stale metadata in main dir. - DualPipesBuilder::clean_stale_metadata( - self.file_system.as_ref(), - &self.cfg.dir, - min_id, + if files.is_empty() { + continue; + } + // Try to cleanup stale metadata left by the previous version. + let (min_id, max_id) = (files[0].seq, files[files.len() - 1].seq); + debug_assert!(min_id > 0); + let mut cleared = 0_u64; + for seq in (0..min_id).rev() { + let file_id = FileId { queue, seq }; + let (in_main_dir, main_filepath) = { + let path = file_id.build_file_path(&self.cfg.dir); + (self.file_system.exists_metadata(&path), path) + }; + let (in_sec_dir, sec_filepath) = { + if self.cfg.secondary_dir.is_some() { + let path = + file_id.build_file_path(self.cfg.secondary_dir.as_ref().unwrap()); + (self.file_system.exists_metadata(&path), Some(path)) + } else { + (false, None) + } + }; + let delete_path = if in_main_dir { + Some(main_filepath) + } else if in_sec_dir { + sec_filepath + } else { + None + }; + if let Some(path) = delete_path { + if let Err(e) = self.file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + cleared += 1; + } else { + // Not found, it means that the following files (file_seq < cur.file_seq) + // are not exists in this Pipe. + break; + } + } + if cleared > 0 { + warn!( + "clear {} stale files of {:?} in range [{}, {}).", + cleared, queue, + min_id - cleared, + max_id, ); - // Clean stale metadata in secondary dir if it was specified. - if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { - DualPipesBuilder::clean_stale_metadata( - self.file_system.as_ref(), - secondary_dir, - min_id, - queue, - ); - } - for seq in min_id..=max_id { - let file_id = FileId { queue, seq }; - let (dir, path_id) = match file_dict.get(&seq) { - Some(DirPathId::Main) => (&self.cfg.dir, DirPathId::Main), - Some(DirPathId::Secondary) => { - debug_assert!(self.cfg.secondary_dir.is_some()); - ( - self.cfg.secondary_dir.as_ref().unwrap(), - DirPathId::Secondary, - ) - } - None => { - warn!( - "Detected a hole when scanning directory, discarding files before {:?}.", - file_id, - ); - files.clear(); - continue; - } - }; - let path = file_id.build_file_path(dir); - let handle = Arc::new(self.file_system.open(&path)?); - files.push(FileToRecover { + } + // Check the file_list and remove the hole of files. + let mut drain_count = files.len(); + for seq in ((max_id + 1 - drain_count as u64)..=max_id).rev() { + if files[drain_count - 1].seq != seq { + warn!( + "Detected a hole when scanning directory, discarding files before file_seq {}.", seq, - handle, - format: None, - path_id, - }); + ); + files.drain(..drain_count); + break; } + drain_count -= 1; } } Ok(()) @@ -204,15 +200,16 @@ impl DualPipesBuilder { /// /// Returns the valid file count and the relative dir_lock. fn scan_dir( + file_system: &F, dir: &str, path_id: DirPathId, - append_scanner: &mut FileScanner, - rewrite_scanner: &mut FileScanner, - ) -> Result<(usize, Option)> { - let dir_lock = Some(lock_dir(dir)?); + append_files: &mut Vec>, + rewrite_files: &mut Vec>, + ) -> Result<(usize, File)> { + let dir_lock = lock_dir(dir)?; // Parse all valid files in `dir`. let mut valid_file_count: usize = 0; - fs::read_dir(Path::new(dir))?.for_each(|e| { + fs::read_dir(Path::new(dir))?.try_for_each(|e| -> Result<()> { if let Ok(e) = e { let p = e.path(); if p.is_file() { @@ -221,70 +218,45 @@ impl DualPipesBuilder { queue: LogQueue::Append, seq, }) => { - append_scanner.file_dict.insert(seq, path_id); - append_scanner.file_seq_range.0 = - std::cmp::min(append_scanner.file_seq_range.0, seq); - append_scanner.file_seq_range.1 = - std::cmp::max(append_scanner.file_seq_range.1, seq); + let file_id = FileId { + queue: LogQueue::Append, + seq, + }; + let path = file_id.build_file_path(dir); + append_files.push(FileToRecover { + seq, + handle: Arc::new(file_system.open(&path)?), + format: None, + path_id, + }); valid_file_count += 1; } Some(FileId { queue: LogQueue::Rewrite, seq, }) => { - rewrite_scanner.file_dict.insert(seq, path_id); - rewrite_scanner.file_seq_range.0 = - std::cmp::min(rewrite_scanner.file_seq_range.0, seq); - rewrite_scanner.file_seq_range.1 = - std::cmp::max(rewrite_scanner.file_seq_range.1, seq); + let file_id = FileId { + queue: LogQueue::Rewrite, + seq, + }; + let path = file_id.build_file_path(dir); + rewrite_files.push(FileToRecover { + seq, + handle: Arc::new(file_system.open(&path)?), + format: None, + path_id, + }); valid_file_count += 1; } _ => {} } } } - }); + Ok(()) + })?; Ok((valid_file_count, dir_lock)) } - /// Cleans up stale metadata left by the previous version. - fn clean_stale_metadata(file_system: &F, dir: &str, min_id: u64, queue: LogQueue) { - let max_sample = 100; - // Find the first obsolete metadata. - let mut delete_start = None; - for i in 0..max_sample { - let seq = i * min_id / max_sample; - let file_id = FileId { queue, seq }; - // Main dir - let path = file_id.build_file_path(&dir); - if file_system.exists_metadata(&path) { - delete_start = Some(i.saturating_sub(1) * min_id / max_sample + 1); - break; - } - } - // Delete metadata starting from the oldest. Abort on error. - if let Some(start) = delete_start { - let mut success = 0; - for seq in start..min_id { - let file_id = FileId { queue, seq }; - let path = file_id.build_file_path(&dir); - if file_system.exists_metadata(&path) { - if let Err(e) = file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; - } - } else { - continue; - } - success += 1; - } - warn!( - "deleted {} stale files of {:?} in range [{}, {}).", - success, queue, start, min_id, - ); - } - } - /// Reads through log items in all available log files, and replays them to /// specific [`ReplayMachine`]s that can be constructed via /// `machine_factory`. @@ -507,7 +479,7 @@ impl DualPipesBuilder { pub fn finish(self) -> Result> { let appender = self.build_pipe(LogQueue::Append)?; let rewriter = self.build_pipe(LogQueue::Rewrite)?; - DualPipes::open(self.dir_lock.unwrap(), appender, rewriter) + DualPipes::open(self.dir_locks.unwrap(), appender, rewriter) } } diff --git a/src/util.rs b/src/util.rs index 53e4a9af..d75d8189 100644 --- a/src/util.rs +++ b/src/util.rs @@ -331,8 +331,25 @@ pub mod dev_ext { use fail::fail_point; + /// Check the given `dir` is valid or not. If `dir` not exists, + /// it would be created automatically. + pub(crate) fn validate_dir(dir: &str) -> IoResult<()> { + let path = Path::new(dir); + if !path.exists() { + std::fs::create_dir(dir)?; + return Ok(()); + } + if !path.is_dir() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Not directory: {}", dir), + )); + } + Ok(()) + } + /// Judges whether `dir1` and `dir2` reside on same device. - pub fn on_same_dev>(dir1: P, dir2: P) -> IoResult { + pub(crate) fn on_same_dev>(dir1: P, dir2: P) -> IoResult { fail_point!("env::force_on_different_dev", |_| { Ok(false) }); if dir1.as_ref().starts_with(dir2.as_ref()) || dir2.as_ref().starts_with(dir1.as_ref()) { return Ok(true); From 966656f612297b763d3c47c1635bc31f890783e5 Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Thu, 15 Sep 2022 20:11:13 +0800 Subject: [PATCH 21/22] Polish annotations. Signed-off-by: Lucasliang --- src/file_pipe_log/pipe_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 333e9f3f..4a4a061d 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -166,7 +166,7 @@ impl DualPipesBuilder { cleared += 1; } else { // Not found, it means that the following files (file_seq < cur.file_seq) - // are not exists in this Pipe. + // do not exist in this Pipe. break; } } From 6a276e5ce69b3285789a1856f4eeda0eaa18496f Mon Sep 17 00:00:00 2001 From: Lucasliang Date: Mon, 26 Sep 2022 14:02:31 +0800 Subject: [PATCH 22/22] Polish the implementation while doing recovery by scanning. Signed-off-by: Lucasliang --- src/config.rs | 18 ++++- src/engine.rs | 24 +----- src/file_pipe_log/pipe.rs | 87 +++++++++++++-------- src/file_pipe_log/pipe_builder.rs | 126 +++++++++++++----------------- src/log_batch.rs | 90 +++++++++------------ src/pipe_log.rs | 1 - src/purge.rs | 4 +- src/util.rs | 20 ----- 8 files changed, 169 insertions(+), 201 deletions(-) diff --git a/src/config.rs b/src/config.rs index f643d0f6..2f7767eb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -4,7 +4,7 @@ use log::warn; use serde::{Deserialize, Serialize}; use crate::pipe_log::Version; -use crate::util::dev_ext::{on_same_dev, validate_dir}; +use crate::util::dev_ext::on_same_dev; use crate::{util::ReadableSize, Result}; const MIN_RECOVERY_READ_BLOCK_SIZE: usize = 512; @@ -214,6 +214,22 @@ impl Config { } } +/// Check the given `dir` is valid or not. If `dir` not exists, +/// it would be created automatically. +fn validate_dir(dir: &str) -> Result<()> { + use std::path::Path; + + let path = Path::new(dir); + if !path.exists() { + std::fs::create_dir(dir)?; + return Ok(()); + } + if !path.is_dir() { + return Err(box_err!("Not directory: {}", dir)); + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/engine.rs b/src/engine.rs index dc2ef298..3a0174da 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -7,7 +7,7 @@ use std::sync::{mpsc, Arc, Mutex}; use std::thread::{Builder as ThreadBuilder, JoinHandle}; use std::time::{Duration, Instant}; -use log::{error, info, warn}; +use log::{error, info}; use protobuf::{parse_from_bytes, Message}; use crate::config::{Config, RecoveryMode}; @@ -146,8 +146,6 @@ where debug_assert!(len > 0); let mut attempt_count = 0_u64; - // Flag on whether force to rotate the current active file or not. - let mut force_rotate = false; let block_handle = loop { // Max retry count is limited to `2`. If the first `append` retry because of // `NOSPC` error, the next `append` should success, unless there exists @@ -167,25 +165,7 @@ where sync |= writer.sync; let log_batch = writer.mut_payload(); - let res = self - .pipe_log - .append(LogQueue::Append, log_batch, force_rotate); - // If we found that there is no spare space for the next LogBatch in the - // current active file, we will mark the `force_rotate` with `true` to - // notify the leader do `rotate` immediately. - if let Err(Error::Other(e)) = res { - warn!( - "Cannot append, err: {}, try to re-append this log_batch into next log", - e - ); - // Notify the next `append` to rotate current file. - force_rotate = true; - writer.set_output(Err(Error::Other(box_err!( - "Failed to append logbatch, try to dump it to other dir" - )))); - continue; - } - force_rotate = false; + let res = self.pipe_log.append(LogQueue::Append, log_batch); writer.set_output(res); } perf_context!(log_write_duration).observe_since(now); diff --git a/src/file_pipe_log/pipe.rs b/src/file_pipe_log/pipe.rs index bbae1591..4f75438d 100644 --- a/src/file_pipe_log/pipe.rs +++ b/src/file_pipe_log/pipe.rs @@ -32,17 +32,34 @@ pub enum DirPathId { /// Mananges multi dirs for storing logs, including `main dir` and /// `secondary dir`. -struct DirectoryManager { +#[derive(Default)] +pub struct DirectoryManager { dirs: Vec, + locks: Vec, } impl DirectoryManager { + #[cfg(test)] fn new(dir: String, secondary_dir: Option) -> Self { let mut dirs = vec![dir; 1]; if let Some(sec_dir) = secondary_dir { dirs.push(sec_dir); } - Self { dirs } + Self { + dirs, + locks: Vec::default(), + } + } + + #[inline] + pub fn add_dir(&mut self, dir: String, dir_lock: File) { + self.dirs.push(dir); + self.locks.push(dir_lock); + } + + #[inline] + pub fn get_all_dir(&self) -> &Vec { + &self.dirs } #[inline] @@ -218,7 +235,7 @@ pub(super) struct SinglePipe { /// `active_file` active_file: CachePadded>>, /// Manager of directory. - dir_mgr: DirectoryManager, + dir_mgr: Arc, } impl Drop for SinglePipe { @@ -253,9 +270,11 @@ impl Drop for SinglePipe { impl SinglePipe { /// Opens a new [`SinglePipe`]. + #[allow(clippy::too_many_arguments)] pub fn open( cfg: &Config, file_system: Arc, + dir_mgr: Arc, listeners: Vec>, queue: LogQueue, mut first_seq: FileSeq, @@ -277,7 +296,6 @@ impl SinglePipe { } } - let dir_mgr = DirectoryManager::new(cfg.dir.clone(), cfg.secondary_dir.clone()); let create_file = first_seq == 0; let active_seq = if create_file { first_seq = 1; @@ -465,14 +483,10 @@ impl SinglePipe { reader.read(handle) } - fn append( - &self, - bytes: &mut T, - force_rotate: bool, - ) -> Result { + fn append(&self, bytes: &mut T) -> Result { fail_point!("file_pipe_log::append"); let mut active_file = self.active_file.lock(); - if active_file.writer.offset() >= self.target_file_size || force_rotate { + if active_file.writer.offset() >= self.target_file_size { if let Err(e) = self.rotate_imp(&mut active_file) { panic!( "error when rotate [{:?}:{}]: {}", @@ -536,9 +550,15 @@ impl SinglePipe { files.first_seq < files.first_seq_in_use /* has stale files */ || self.dir_mgr.get_free_dir(self.target_file_size).is_some() }; - // If there still exists free space for this record, a special Err will - // be returned to the caller. + // If there still exists free space for this record, rotate the file + // and return a special Err (for retry) to the caller. if no_space_err && has_free_space { + if let Err(e) = self.rotate_imp(&mut active_file) { + panic!( + "error when rotate [{:?}:{}]: {}", + self.queue, active_file.seq, e + ); + } return Err(Error::Other(box_err!( "failed to write {} file, get {} try to flush it to other dir", seq, @@ -636,25 +656,18 @@ impl SinglePipe { /// A [`PipeLog`] implementation that stores data in filesystem. pub struct DualPipes { pipes: [SinglePipe; 2], - - _dir_locks: Vec, } impl DualPipes { /// Open a new [`DualPipes`]. Assumes the two [`SinglePipe`]s share the /// same directory, and that directory is locked by `dir_lock`. - pub(super) fn open( - dir_locks: Vec, - appender: SinglePipe, - rewriter: SinglePipe, - ) -> Result { + pub(super) fn open(appender: SinglePipe, rewriter: SinglePipe) -> Result { // TODO: remove this dependency. debug_assert_eq!(LogQueue::Append as usize, 0); debug_assert_eq!(LogQueue::Rewrite as usize, 1); Ok(Self { pipes: [appender, rewriter], - _dir_locks: dir_locks, }) } @@ -675,9 +688,8 @@ impl PipeLog for DualPipes { &self, queue: LogQueue, bytes: &mut T, - force_rotate: bool, ) -> Result { - self.pipes[queue as usize].append(bytes, force_rotate) + self.pipes[queue as usize].append(bytes) } #[inline] @@ -721,10 +733,12 @@ mod tests { cfg: &Config, queue: LogQueue, fs: Arc, + dir_mgr: Arc, ) -> Result> { SinglePipe::open( cfg, fs, + dir_mgr, Vec::new(), queue, 0, @@ -737,10 +751,17 @@ mod tests { } fn new_test_pipes(cfg: &Config) -> Result> { + let mut dirs = DirectoryManager::default(); + dirs.add_dir(cfg.dir.clone(), lock_dir(&cfg.dir)?); + let dir_mgr = Arc::new(dirs); DualPipes::open( - vec![lock_dir(&cfg.dir)?], - new_test_pipe(cfg, LogQueue::Append, Arc::new(DefaultFileSystem))?, - new_test_pipe(cfg, LogQueue::Rewrite, Arc::new(DefaultFileSystem))?, + new_test_pipe( + cfg, + LogQueue::Append, + Arc::new(DefaultFileSystem), + dir_mgr.clone(), + )?, + new_test_pipe(cfg, LogQueue::Rewrite, Arc::new(DefaultFileSystem), dir_mgr)?, ) } @@ -780,12 +801,12 @@ mod tests { // generate file 1, 2, 3 let content: Vec = vec![b'a'; 1024]; - let file_handle = pipe_log.append(queue, &mut &content, false).unwrap(); + let file_handle = pipe_log.append(queue, &mut &content).unwrap(); assert_eq!(file_handle.id.seq, 1); assert_eq!(file_handle.offset, header_size); assert_eq!(pipe_log.file_span(queue).1, 1); - let file_handle = pipe_log.append(queue, &mut &content, false).unwrap(); + let file_handle = pipe_log.append(queue, &mut &content).unwrap(); assert_eq!(file_handle.id.seq, 2); assert_eq!(file_handle.offset, header_size); assert_eq!(pipe_log.file_span(queue).1, 2); @@ -801,11 +822,11 @@ mod tests { // append position let s_content = b"short content".to_vec(); - let file_handle = pipe_log.append(queue, &mut &s_content, false).unwrap(); + let file_handle = pipe_log.append(queue, &mut &s_content).unwrap(); assert_eq!(file_handle.id.seq, 3); assert_eq!(file_handle.offset, header_size); - let file_handle = pipe_log.append(queue, &mut &s_content, false).unwrap(); + let file_handle = pipe_log.append(queue, &mut &s_content).unwrap(); assert_eq!(file_handle.id.seq, 3); assert_eq!( file_handle.offset, @@ -926,7 +947,9 @@ mod tests { }; let queue = LogQueue::Append; let fs = Arc::new(ObfuscatedFileSystem::default()); - let pipe_log = new_test_pipe(&cfg, queue, fs.clone()).unwrap(); + let mut dir_mgr = DirectoryManager::default(); + dir_mgr.add_dir(cfg.dir.clone(), lock_dir(&cfg.dir).unwrap()); + let pipe_log = new_test_pipe(&cfg, queue, fs.clone(), Arc::new(dir_mgr)).unwrap(); assert_eq!(pipe_log.file_span(), (1, 1)); fn content(i: usize) -> Vec { @@ -934,7 +957,7 @@ mod tests { } let mut handles = Vec::new(); for i in 0..10 { - handles.push(pipe_log.append(&mut &content(i), false).unwrap()); + handles.push(pipe_log.append(&mut &content(i)).unwrap()); pipe_log.sync().unwrap(); } pipe_log.rotate().unwrap(); @@ -956,7 +979,7 @@ mod tests { // Try to reuse. let mut handles = Vec::new(); for i in 0..10 { - handles.push(pipe_log.append(&mut &content(i + 1), false).unwrap()); + handles.push(pipe_log.append(&mut &content(i + 1)).unwrap()); pipe_log.sync().unwrap(); } // Verify the data. diff --git a/src/file_pipe_log/pipe_builder.rs b/src/file_pipe_log/pipe_builder.rs index 4a4a061d..bbc03a23 100644 --- a/src/file_pipe_log/pipe_builder.rs +++ b/src/file_pipe_log/pipe_builder.rs @@ -22,7 +22,7 @@ use crate::{Error, Result}; use super::format::{lock_file_path, FileNameExt, LogFileFormat}; use super::log_file::build_file_reader; -use super::pipe::{DirPathId, DualPipes, FileWithFormat, SinglePipe}; +use super::pipe::{DirPathId, DirectoryManager, DualPipes, FileWithFormat, SinglePipe}; use super::reader::LogItemBatchFileReader; use crate::env::Handle; @@ -77,7 +77,7 @@ pub struct DualPipesBuilder { listeners: Vec>, /// Only filled after a successful call of `DualPipesBuilder::scan`. - dir_locks: Option>, + dir_mgr: Option>, /// Only filled after a successful call of `DualPipesBuilder::scan`. append_files: Vec>, /// Only filled after a successful call of `DualPipesBuilder::scan`. @@ -91,7 +91,7 @@ impl DualPipesBuilder { cfg, file_system, listeners, - dir_locks: None, + dir_mgr: None, append_files: Vec::new(), rewrite_files: Vec::new(), } @@ -100,31 +100,29 @@ impl DualPipesBuilder { /// Scans for all log files under the working directory. pub fn scan(&mut self) -> Result<()> { // Scan main `dir` and `secondary-dir`, if `secondary-dir` is valid. - let mut dir_locks: Vec = Vec::new(); - let (_, dir_lock) = DualPipesBuilder::::scan_dir( + let mut dir_mgr = DirectoryManager::default(); + dir_mgr.add_dir(self.cfg.dir.clone(), lock_dir(&self.cfg.dir)?); + DualPipesBuilder::::scan_dir_imp( &self.file_system, - &self.cfg.dir, DirPathId::Main, + &self.cfg.dir, &mut self.append_files, &mut self.rewrite_files, )?; - dir_locks.push(dir_lock); if let Some(secondary_dir) = self.cfg.secondary_dir.as_ref() { - let (_, sec_dir_lock) = DualPipesBuilder::::scan_dir( + dir_mgr.add_dir(secondary_dir.clone(), lock_dir(secondary_dir)?); + DualPipesBuilder::::scan_dir_imp( &self.file_system, - secondary_dir, DirPathId::Secondary, + secondary_dir, &mut self.append_files, &mut self.rewrite_files, )?; - dir_locks.push(sec_dir_lock); } - self.dir_locks = Some(dir_locks); - // Sorts the expected `file_list` according to `file_seq`. self.append_files.sort_by(|a, b| a.seq.cmp(&b.seq)); self.rewrite_files.sort_by(|a, b| a.seq.cmp(&b.seq)); - + // Validate rewrite & append `file_list` individually, and clear stale metadata. for (queue, files) in [ (LogQueue::Append, &mut self.append_files), (LogQueue::Rewrite, &mut self.rewrite_files), @@ -132,82 +130,67 @@ impl DualPipesBuilder { if files.is_empty() { continue; } + // Check the file_list and remove the hole of files. + let mut current_seq = files[0].seq; + let mut invalid_files = 0_usize; + debug_assert!(current_seq > 0); + for (i, f) in files.iter().enumerate() { + if f.seq > current_seq + (i - invalid_files) as u64 { + warn!( + "Detected a hole when scanning directory, discarding files before file_seq {}.", + f.seq, + ); + current_seq = f.seq + 1; + invalid_files = i; + } else if f.seq < current_seq { + return Err(Error::InvalidArgument("Duplicate file".to_string())); + } + } + files.drain(..invalid_files); // Try to cleanup stale metadata left by the previous version. - let (min_id, max_id) = (files[0].seq, files[files.len() - 1].seq); - debug_assert!(min_id > 0); + if files.is_empty() { + continue; + } let mut cleared = 0_u64; - for seq in (0..min_id).rev() { + let clear_start: u64 = { + // TODO: Need a more efficient way to remove sparse stale metadata, + // without iterating one by one. + 1 + }; + for seq in (clear_start..files[0].seq).rev() { let file_id = FileId { queue, seq }; - let (in_main_dir, main_filepath) = { - let path = file_id.build_file_path(&self.cfg.dir); - (self.file_system.exists_metadata(&path), path) - }; - let (in_sec_dir, sec_filepath) = { - if self.cfg.secondary_dir.is_some() { - let path = - file_id.build_file_path(self.cfg.secondary_dir.as_ref().unwrap()); - (self.file_system.exists_metadata(&path), Some(path)) - } else { - (false, None) - } - }; - let delete_path = if in_main_dir { - Some(main_filepath) - } else if in_sec_dir { - sec_filepath - } else { - None - }; - if let Some(path) = delete_path { - if let Err(e) = self.file_system.delete_metadata(&path) { - error!("failed to delete metadata of {}: {}.", path.display(), e); - break; + for dir in dir_mgr.get_all_dir().iter() { + let path = file_id.build_file_path(dir); + if self.file_system.exists_metadata(&path) { + if let Err(e) = self.file_system.delete_metadata(&path) { + error!("failed to delete metadata of {}: {}.", path.display(), e); + break; + } + cleared += 1; } - cleared += 1; - } else { - // Not found, it means that the following files (file_seq < cur.file_seq) - // do not exist in this Pipe. - break; } } if cleared > 0 { warn!( "clear {} stale files of {:?} in range [{}, {}).", - cleared, - queue, - min_id - cleared, - max_id, + cleared, queue, 0, files[0].seq, ); } - // Check the file_list and remove the hole of files. - let mut drain_count = files.len(); - for seq in ((max_id + 1 - drain_count as u64)..=max_id).rev() { - if files[drain_count - 1].seq != seq { - warn!( - "Detected a hole when scanning directory, discarding files before file_seq {}.", - seq, - ); - files.drain(..drain_count); - break; - } - drain_count -= 1; - } } + self.dir_mgr = Some(Arc::new(dir_mgr)); Ok(()) } - /// Scans for all log files under the given directory. + /// Scans and parses all log files under the given directory. /// - /// Returns the valid file count and the relative dir_lock. - fn scan_dir( + /// Returns the valid file count + fn scan_dir_imp( file_system: &F, - dir: &str, path_id: DirPathId, + dir: &str, append_files: &mut Vec>, rewrite_files: &mut Vec>, - ) -> Result<(usize, File)> { - let dir_lock = lock_dir(dir)?; - // Parse all valid files in `dir`. + ) -> Result { let mut valid_file_count: usize = 0; fs::read_dir(Path::new(dir))?.try_for_each(|e| -> Result<()> { if let Ok(e) = e { @@ -254,7 +237,7 @@ impl DualPipesBuilder { } Ok(()) })?; - Ok((valid_file_count, dir_lock)) + Ok(valid_file_count) } /// Reads through log items in all available log files, and replays them to @@ -464,6 +447,7 @@ impl DualPipesBuilder { SinglePipe::open( &self.cfg, self.file_system.clone(), + self.dir_mgr.clone().unwrap(), self.listeners.clone(), queue, first_seq, @@ -479,7 +463,7 @@ impl DualPipesBuilder { pub fn finish(self) -> Result> { let appender = self.build_pipe(LogQueue::Append)?; let rewriter = self.build_pipe(LogQueue::Rewrite)?; - DualPipes::open(self.dir_locks.unwrap(), appender, rewriter) + DualPipes::open(appender, rewriter) } } diff --git a/src/log_batch.rs b/src/log_batch.rs index 2064b2e4..8010eec0 100644 --- a/src/log_batch.rs +++ b/src/log_batch.rs @@ -409,45 +409,6 @@ impl LogItemBatch { } } - /// Prepare the `write` by signing a checksum, so-called `signature`, - /// into the `LogBatch`. - /// - /// The `signature` is both generated by the given `LogFileContext`. - /// That is, the final checksum of each `LogBatch` consists of this - /// `signature` and the original `checksum` of the contents. - pub(crate) fn prepare_write( - buf: &mut Vec, - file_context: &LogFileContext, - ) -> Result> { - if !buf.is_empty() { - if let Some(signature) = file_context.get_signature() { - // Insert the signature into the encoded bytes. Rewrite checksum of - // `LogItemBatch` in `LogBatch`. - let footer_checksum_offset = buf.len() - LOG_BATCH_CHECKSUM_LEN; - let original_checksum = codec::decode_u32_le(&mut &buf[footer_checksum_offset..])?; - // The final checksum is generated by `signature` ***XOR*** - // `original checksum of buf`. - (&mut buf[footer_checksum_offset..]) - .write_u32::(original_checksum ^ signature)?; - return Ok(Some(signature)); - } - } - Ok(None) - } - - /// Prepare the `rewrite` by reseting the `signature` in the `LogBatch`. - pub(crate) fn prepare_rewrite(buf: &mut Vec, signature: Option) -> Result<()> { - debug_assert!(!buf.is_empty()); - if let Some(signature) = signature { - let footer_checksum_offset = buf.len() - LOG_BATCH_CHECKSUM_LEN; - let original_checksum = codec::decode_u32_le(&mut &buf[footer_checksum_offset..])?; - // Reset the final checksum by `signature` ***XOR*** `original checksum of buf`. - (&mut buf[footer_checksum_offset..]) - .write_u32::(original_checksum ^ signature)?; - } - Ok(()) - } - pub(crate) fn finish_write(&mut self, handle: FileBlockHandle) { for item in self.items.iter_mut() { match &mut item.content { @@ -800,24 +761,34 @@ impl LogBatch { } /// Makes preparations for the write of `LogBatch`. + /// + /// Internally rewrites the checksum of each `LogBatch` by a signature, + /// generated by the given `LogFileContext`. That is, the final checksum + /// of each `LogBatch` consists of this `signature` and the original + /// `checksum` of the contents. #[inline] pub(crate) fn prepare_write(&mut self, file_context: &LogFileContext) -> Result<()> { + let new_signature = file_context.get_signature(); match self.buf_state { BufState::Encoded(header_offset, entries_len) => { - self.buf_state = BufState::Sealed( - header_offset, - entries_len, - LogItemBatch::prepare_write(&mut self.buf, file_context)?, - ); + sign_checksum(&mut self.buf, new_signature)?; + self.buf_state = BufState::Sealed(header_offset, entries_len, new_signature); } - BufState::Sealed(header_offset, entries_len, signature) => { - LogItemBatch::prepare_rewrite(&mut self.buf, signature)?; + BufState::Sealed(header_offset, entries_len, old_signature) => { // Re-seal the LogBatch. - self.buf_state = BufState::Sealed( - header_offset, - entries_len, - LogItemBatch::prepare_write(&mut self.buf, file_context)?, - ); + match (old_signature, new_signature) { + (Some(old), Some(new)) => { + sign_checksum(&mut self.buf, Some(old ^ new))?; + } + (Some(old), None) => { + sign_checksum(&mut self.buf, Some(old))?; + } + (None, Some(new)) => { + sign_checksum(&mut self.buf, Some(new))?; + } + _ => {} + } + self.buf_state = BufState::Sealed(header_offset, entries_len, new_signature); } _ => unreachable!(), } @@ -941,6 +912,23 @@ impl ReactiveBytes for LogBatch { } } +/// Signs a checksum, so-called `signature`, into the `LogBatch`. +fn sign_checksum(buf: &mut Vec, signature: Option) -> Result<()> { + if !buf.is_empty() { + if let Some(signature) = signature { + // Insert the signature into the encoded bytes. Rewrite checksum of + // `LogItemBatch` in `LogBatch`. + let footer_checksum_offset = buf.len() - LOG_BATCH_CHECKSUM_LEN; + let original_checksum = codec::decode_u32_le(&mut &buf[footer_checksum_offset..])?; + // The final checksum is generated by `signature` ***XOR*** + // `original checksum of buf`. + (&mut buf[footer_checksum_offset..]) + .write_u32::(original_checksum ^ signature)?; + } + } + Ok(()) +} + /// Verifies the checksum of a slice of bytes that sequentially holds data and /// checksum. The checksum field may be signed by XOR-ing with an u32. fn verify_checksum_with_signature(buf: &[u8], signature: Option) -> Result<()> { diff --git a/src/pipe_log.rs b/src/pipe_log.rs index 43d465ab..aa9b7925 100644 --- a/src/pipe_log.rs +++ b/src/pipe_log.rs @@ -178,7 +178,6 @@ pub trait PipeLog: Sized { &self, queue: LogQueue, bytes: &mut T, - force_rotate: bool, ) -> Result; /// Synchronizes all buffered writes. diff --git a/src/purge.rs b/src/purge.rs index 61a3631a..cfefd219 100644 --- a/src/purge.rs +++ b/src/purge.rs @@ -364,9 +364,7 @@ where return self.pipe_log.sync(LogQueue::Rewrite); } log_batch.finish_populate(self.cfg.batch_compression_threshold.0 as usize)?; - let file_handle = - self.pipe_log - .append(LogQueue::Rewrite, log_batch, false /* force_rotate */)?; + let file_handle = self.pipe_log.append(LogQueue::Rewrite, log_batch)?; if sync { self.pipe_log.sync(LogQueue::Rewrite)? } diff --git a/src/util.rs b/src/util.rs index d75d8189..83a6a0cc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -331,29 +331,9 @@ pub mod dev_ext { use fail::fail_point; - /// Check the given `dir` is valid or not. If `dir` not exists, - /// it would be created automatically. - pub(crate) fn validate_dir(dir: &str) -> IoResult<()> { - let path = Path::new(dir); - if !path.exists() { - std::fs::create_dir(dir)?; - return Ok(()); - } - if !path.is_dir() { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Not directory: {}", dir), - )); - } - Ok(()) - } - /// Judges whether `dir1` and `dir2` reside on same device. pub(crate) fn on_same_dev>(dir1: P, dir2: P) -> IoResult { fail_point!("env::force_on_different_dev", |_| { Ok(false) }); - if dir1.as_ref().starts_with(dir2.as_ref()) || dir2.as_ref().starts_with(dir1.as_ref()) { - return Ok(true); - } #[cfg(any(target_os = "unix", target_os = "macos"))] { use std::os::unix::fs::MetadataExt;