-
Notifications
You must be signed in to change notification settings - Fork 88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support secondary directory for log writing. #261
Changes from 29 commits
dabc636
3c927b2
221cc8f
b6570a7
a97ca69
4859a52
3da8d13
5231a07
2278f0e
d424d76
5704eb5
35fc5b5
30e0802
d769480
a92873e
703892f
34307bb
dcd0e24
a0c977b
acbe904
193d34f
f306981
518621d
00b7059
6607662
0a0b705
f5df93e
6e67a29
966656f
7e65af6
6a276e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ use std::sync::{mpsc, Arc, Mutex}; | |
use std::thread::{Builder as ThreadBuilder, JoinHandle}; | ||
use std::time::{Duration, Instant}; | ||
|
||
use log::{error, info}; | ||
use log::{error, info, warn}; | ||
use protobuf::{parse_from_bytes, Message}; | ||
|
||
use crate::config::{Config, RecoveryMode}; | ||
|
@@ -25,6 +25,8 @@ use crate::write_barrier::{WriteBarrier, Writer}; | |
use crate::{perf_context, Error, GlobalStats, Result}; | ||
|
||
const METRICS_FLUSH_INTERVAL: Duration = Duration::from_secs(30); | ||
/// Max retry count for `write`. | ||
const MAX_WRITE_RETRY_COUNT: u64 = 2; | ||
|
||
pub struct Engine<F = DefaultFileSystem, P = FilePipeLog<F>> | ||
where | ||
|
@@ -142,7 +144,16 @@ where | |
let start = Instant::now(); | ||
let len = log_batch.finish_populate(self.cfg.batch_compression_threshold.0 as usize)?; | ||
debug_assert!(len > 0); | ||
let block_handle = { | ||
|
||
let mut attempt_count = 0_u64; | ||
// Flag on whether force to rotate the current active file or not. | ||
let mut force_rotate = false; | ||
let block_handle = loop { | ||
// Max retry count is limited to `2`. If the first `append` retry because of | ||
// `NOSPC` error, the next `append` should success, unless there exists | ||
// several abnormal cases in the IO device. In that case, `Engine::write` | ||
// must return `Err`. | ||
attempt_count += 1; | ||
let mut writer = Writer::new(log_batch, sync); | ||
// Snapshot and clear the current perf context temporarily, so the write group | ||
// leader will collect the perf context diff later. | ||
|
@@ -154,14 +165,34 @@ where | |
for writer in group.iter_mut() { | ||
writer.entered_time = Some(now); | ||
sync |= writer.sync; | ||
|
||
let log_batch = writer.mut_payload(); | ||
let res = self.pipe_log.append(LogQueue::Append, log_batch); | ||
let res = self | ||
.pipe_log | ||
.append(LogQueue::Append, log_batch, force_rotate); | ||
// If we found that there is no spare space for the next LogBatch in the | ||
// current active file, we will mark the `force_rotate` with `true` to | ||
// notify the leader do `rotate` immediately. | ||
if let Err(Error::Other(e)) = res { | ||
warn!( | ||
"Cannot append, err: {}, try to re-append this log_batch into next log", | ||
e | ||
); | ||
// Notify the next `append` to rotate current file. | ||
force_rotate = true; | ||
writer.set_output(Err(Error::Other(box_err!( | ||
"Failed to append logbatch, try to dump it to other dir" | ||
)))); | ||
continue; | ||
} | ||
force_rotate = false; | ||
writer.set_output(res); | ||
} | ||
perf_context!(log_write_duration).observe_since(now); | ||
if sync { | ||
// As per trait protocol, this error should be retriable. But we panic anyway to | ||
// save the trouble of propagating it to other group members. | ||
// As per trait protocol, this error should be retriable. But we panic | ||
// anyway to save the trouble of propagating it to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the comment line break is changed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tackled in the new pr. |
||
// other group members. | ||
self.pipe_log.sync(LogQueue::Append).expect("pipe::sync()"); | ||
} | ||
// Pass the perf context diff to all the writers. | ||
|
@@ -178,7 +209,27 @@ where | |
debug_assert_eq!(writer.perf_context_diff.write_wait_duration, Duration::ZERO); | ||
perf_context += &writer.perf_context_diff; | ||
set_perf_context(perf_context); | ||
writer.finish()? | ||
// Retry if `writer.finish()` returns a special 'Error::Other', remarking that | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add another error type TryAgain. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
// there still exists free space for this `LogBatch`. | ||
match writer.finish() { | ||
Ok(handle) => { | ||
break handle; | ||
} | ||
Err(Error::Other(_)) => { | ||
// A special err, we will retry this LogBatch `append` by appending | ||
// this writer to the next write group, and the current write leader | ||
// will not hang on this write and will return timely. | ||
if attempt_count >= MAX_WRITE_RETRY_COUNT { | ||
return Err(Error::Other(box_err!( | ||
"Failed to write logbatch, exceed max_retry_count: ({})", | ||
MAX_WRITE_RETRY_COUNT | ||
))); | ||
} | ||
} | ||
Err(e) => { | ||
return Err(e); | ||
} | ||
} | ||
}; | ||
let mut now = Instant::now(); | ||
log_batch.finish_write(block_handle); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not what I meant by "rotate inside pipe_log". The decision to rotate should be put inside pipe_log.