Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into arpad/pageserver_io_a…
Browse files Browse the repository at this point in the history
…sync_kmerge_unified
  • Loading branch information
arpad-m committed Aug 14, 2023
2 parents e4c9507 + ce7efbe commit 2850222
Show file tree
Hide file tree
Showing 56 changed files with 2,164 additions and 1,202 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ jobs:
run:
shell: sh -eu {0}
env:
VM_BUILDER_VERSION: v0.15.0-alpha1
VM_BUILDER_VERSION: v0.15.4

steps:
- name: Checkout
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ RUN set -e \
--bin safekeeper \
--bin storage_broker \
--bin proxy \
--bin neon_local \
--locked --release \
&& cachepot -s

Expand All @@ -76,6 +77,7 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl
COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin

COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
Expand Down
4 changes: 3 additions & 1 deletion compute_tools/src/extension_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ pub async fn get_available_extensions(
let ext_index_full = serde_json::from_slice::<Index>(&ext_idx_buffer)?;
let mut enabled_extensions = ext_index_full.public_extensions;
enabled_extensions.extend_from_slice(custom_extensions);
let library_index = ext_index_full.library_index;
let mut library_index = ext_index_full.library_index;
let all_extension_data = ext_index_full.extension_data;
info!("library_index: {:?}", library_index);

Expand All @@ -179,6 +179,8 @@ pub async fn get_available_extensions(
file_create_tasks.push(tokio::fs::write(control_path, control_contents));
} else {
warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_file);
// also delete this from library index
library_index.retain(|_, value| value != extension_name);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion compute_tools/src/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
}
RoleAction::Create => {
let mut query: String = format!(
"CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser",
"CREATE ROLE {} CREATEROLE CREATEDB BYPASSRLS IN ROLE neon_superuser",
name.pg_quote()
);
info!("role create query: '{}'", &query);
Expand Down
188 changes: 188 additions & 0 deletions libs/utils/src/backoff.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
use std::fmt::{Debug, Display};

use futures::Future;

pub const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
pub const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;

pub async fn exponential_backoff(n: u32, base_increment: f64, max_seconds: f64) {
let backoff_duration_seconds =
exponential_backoff_duration_seconds(n, base_increment, max_seconds);
if backoff_duration_seconds > 0.0 {
tracing::info!(
"Backoff: waiting {backoff_duration_seconds} seconds before processing with the task",
);
tokio::time::sleep(std::time::Duration::from_secs_f64(backoff_duration_seconds)).await;
}
}

pub fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {
if n == 0 {
0.0
} else {
(1.0 + base_increment).powf(f64::from(n)).min(max_seconds)
}
}

/// retries passed operation until one of the following conditions are met:
/// Encountered error is considered as permanent (non-retryable)
/// Retries have been exhausted.
/// `is_permanent` closure should be used to provide distinction between permanent/non-permanent errors
/// When attempts cross `warn_threshold` function starts to emit log warnings.
/// `description` argument is added to log messages. Its value should identify the `op` is doing
pub async fn retry<T, O, F, E>(
mut op: O,
is_permanent: impl Fn(&E) -> bool,
warn_threshold: u32,
max_retries: u32,
description: &str,
) -> Result<T, E>
where
// Not std::error::Error because anyhow::Error doesnt implement it.
// For context see https://github.com/dtolnay/anyhow/issues/63
E: Display + Debug,
O: FnMut() -> F,
F: Future<Output = Result<T, E>>,
{
let mut attempts = 0;
loop {
let result = op().await;
match result {
Ok(_) => {
if attempts > 0 {
tracing::info!("{description} succeeded after {attempts} retries");
}
return result;
}

// These are "permanent" errors that should not be retried.
Err(ref e) if is_permanent(e) => {
return result;
}
// Assume that any other failure might be transient, and the operation might
// succeed if we just keep trying.
Err(err) if attempts < warn_threshold => {
tracing::info!("{description} failed, will retry (attempt {attempts}): {err:#}");
}
Err(err) if attempts < max_retries => {
tracing::warn!("{description} failed, will retry (attempt {attempts}): {err:#}");
}
Err(ref err) => {
// Operation failed `max_attempts` times. Time to give up.
tracing::warn!(
"{description} still failed after {attempts} retries, giving up: {err:?}"
);
return result;
}
}
// sleep and retry
exponential_backoff(
attempts,
DEFAULT_BASE_BACKOFF_SECONDS,
DEFAULT_MAX_BACKOFF_SECONDS,
)
.await;
attempts += 1;
}
}

#[cfg(test)]
mod tests {
use std::io;

use tokio::sync::Mutex;

use super::*;

#[test]
fn backoff_defaults_produce_growing_backoff_sequence() {
let mut current_backoff_value = None;

for i in 0..10_000 {
let new_backoff_value = exponential_backoff_duration_seconds(
i,
DEFAULT_BASE_BACKOFF_SECONDS,
DEFAULT_MAX_BACKOFF_SECONDS,
);

if let Some(old_backoff_value) = current_backoff_value.replace(new_backoff_value) {
assert!(
old_backoff_value <= new_backoff_value,
"{i}th backoff value {new_backoff_value} is smaller than the previous one {old_backoff_value}"
)
}
}

assert_eq!(
current_backoff_value.expect("Should have produced backoff values to compare"),
DEFAULT_MAX_BACKOFF_SECONDS,
"Given big enough of retries, backoff should reach its allowed max value"
);
}

#[tokio::test(start_paused = true)]
async fn retry_always_error() {
let count = Mutex::new(0);
let err_result = retry(
|| async {
*count.lock().await += 1;
Result::<(), io::Error>::Err(io::Error::from(io::ErrorKind::Other))
},
|_e| false,
1,
1,
"work",
)
.await;

assert!(err_result.is_err());

assert_eq!(*count.lock().await, 2);
}

#[tokio::test(start_paused = true)]
async fn retry_ok_after_err() {
let count = Mutex::new(0);
retry(
|| async {
let mut locked = count.lock().await;
if *locked > 1 {
Ok(())
} else {
*locked += 1;
Err(io::Error::from(io::ErrorKind::Other))
}
},
|_e| false,
2,
2,
"work",
)
.await
.unwrap();
}

#[tokio::test(start_paused = true)]
async fn dont_retry_permanent_errors() {
let count = Mutex::new(0);
let _ = retry(
|| async {
let mut locked = count.lock().await;
if *locked > 1 {
Ok(())
} else {
*locked += 1;
Err(io::Error::from(io::ErrorKind::Other))
}
},
|_e| true,
2,
2,
"work",
)
.await
.unwrap_err();

assert_eq!(*count.lock().await, 1);
}
}
37 changes: 36 additions & 1 deletion libs/utils/src/fs_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ pub async fn is_directory_empty(path: impl AsRef<Path>) -> anyhow::Result<bool>
Ok(dir.next_entry().await?.is_none())
}

pub async fn list_dir(path: impl AsRef<Path>) -> anyhow::Result<Vec<String>> {
let mut dir = tokio::fs::read_dir(&path)
.await
.context(format!("read_dir({})", path.as_ref().display()))?;

let mut content = vec![];
while let Some(next) = dir.next_entry().await? {
let file_name = next.file_name();
content.push(file_name.to_string_lossy().to_string());
}

Ok(content)
}

pub fn ignore_not_found(e: io::Error) -> io::Result<()> {
if e.kind() == io::ErrorKind::NotFound {
Ok(())
Expand All @@ -43,7 +57,7 @@ where
mod test {
use std::path::PathBuf;

use crate::fs_ext::is_directory_empty;
use crate::fs_ext::{is_directory_empty, list_dir};

use super::ignore_absent_files;

Expand Down Expand Up @@ -109,4 +123,25 @@ mod test {

assert!(!file_path.exists());
}

#[tokio::test]
async fn list_dir_works() {
let dir = tempfile::tempdir().unwrap();
let dir_path = dir.path();

assert!(list_dir(dir_path).await.unwrap().is_empty());

let file_path: PathBuf = dir_path.join("testfile");
let _ = std::fs::File::create(&file_path).unwrap();

assert_eq!(&list_dir(dir_path).await.unwrap(), &["testfile"]);

let another_dir_path: PathBuf = dir_path.join("testdir");
std::fs::create_dir(another_dir_path).unwrap();

let expected = &["testdir", "testfile"];
let mut actual = list_dir(dir_path).await.unwrap();
actual.sort();
assert_eq!(actual, expected);
}
}
2 changes: 2 additions & 0 deletions libs/utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//! `utils` is intended to be a place to put code that is shared
//! between other crates in this repository.
pub mod backoff;

/// `Lsn` type implements common tasks on Log Sequence Numbers
pub mod lsn;
/// SeqWait allows waiting for a future sequence number to arrive
Expand Down
2 changes: 1 addition & 1 deletion pageserver/ctl/src/layers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
.await?;
let cursor = BlockCursor::new(&file);
for (k, v) in all {
let value = cursor.read_blob(v.pos())?;
let value = cursor.read_blob(v.pos()).await?;
println!("key:{} value_len:{}", k, value.len());
}
// TODO(chi): special handling for last key?
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ fn start_pageserver(
let order = pageserver::InitializationOrder {
initial_tenant_load: Some(init_done_tx),
initial_logical_size_can_start: init_done_rx.clone(),
initial_logical_size_attempt: init_logical_size_done_tx,
initial_logical_size_attempt: Some(init_logical_size_done_tx),
background_jobs_can_start: background_jobs_barrier.clone(),
};

Expand Down
9 changes: 8 additions & 1 deletion pageserver/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ use utils::{
use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
use crate::tenant::config::TenantConf;
use crate::tenant::config::TenantConfOpt;
use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
use crate::tenant::{
TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
};
use crate::{
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX,
TIMELINE_UNINIT_MARK_SUFFIX,
Expand Down Expand Up @@ -613,6 +615,11 @@ impl PageServerConf {
)
}

pub fn tenant_deleted_mark_file_path(&self, tenant_id: &TenantId) -> PathBuf {
self.tenant_path(tenant_id)
.join(TENANT_DELETED_MARKER_FILE_NAME)
}

pub fn traces_path(&self) -> PathBuf {
self.workdir.join("traces")
}
Expand Down
Loading

0 comments on commit 2850222

Please sign in to comment.