Skip to content

Commit

Permalink
Merge branch 'main' into erik/respond-segment-flush-lsn
Browse files Browse the repository at this point in the history
  • Loading branch information
erikgrinaker authored Nov 13, 2024
2 parents 71f04f9 + 080d585 commit 4bfdf46
Show file tree
Hide file tree
Showing 77 changed files with 1,820 additions and 604 deletions.
1 change: 1 addition & 0 deletions .github/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ config-variables:
- REMOTE_STORAGE_AZURE_REGION
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
- DEV_AWS_OIDC_ROLE_ARN
- BENCHMARK_INGEST_TARGET_PROJECTID
2 changes: 2 additions & 0 deletions .github/actions/allure-report-generate/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ runs:
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
const { REPORT_URL, COMMIT_SHA } = process.env
Expand Down
11 changes: 0 additions & 11 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
## Problem

## Summary of changes

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above checklist
2 changes: 2 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ jobs:
REPORT_URL_NEW: ${{ steps.upload-coverage-report-new.outputs.report-url }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
const { REPORT_URL_NEW, COMMIT_SHA } = process.env
Expand Down
372 changes: 372 additions & 0 deletions .github/workflows/ingest_benchmark.yml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions .github/workflows/neon_extra_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ jobs:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
const { REPORT_URL, SHA } = process.env
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/report-workflow-stats-batch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Report Workflow Stats Batch

on:
schedule:
- cron: '*/15 * * * *'
- cron: '25 0 * * *'

jobs:
gh-workflow-stats-batch:
name: GitHub Workflow Stats Batch
runs-on: ubuntu-22.04
permissions:
actions: read
steps:
- name: Export Workflow Run for the past 2 hours
uses: neondatabase/[email protected]
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '2h'
- name: Export Workflow Run for the past 24 hours
if: github.event.schedule == '25 0 * * *'
uses: neondatabase/[email protected]
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '24h'
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 4 additions & 6 deletions compute/compute-node.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -624,16 +624,12 @@ FROM build-deps AS pg-cron-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

# 1.6.4 available, supports v17
# This is an experimental extension that we do not support on prod yet.
# !Do not remove!
# We set it in shared_preload_libraries and computes will fail to start if library is not found.
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O pg_cron.tar.gz && \
echo "52d1850ee7beb85a4cb7185731ef4e5a90d1de216709d8988324b0d02e76af61 pg_cron.tar.gz" | sha256sum --check && \
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
Expand Down Expand Up @@ -1475,6 +1471,8 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter

COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml

COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml /etc/sql_exporter.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml /etc/neon_collector.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SELECT neon.backpressure_throttling_time()::float8 / 1000 AS throttled;
SELECT (neon.backpressure_throttling_time()::float8 / 1000000) AS throttled;
2 changes: 1 addition & 1 deletion compute/vm-image-spec-bookworm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: sql-exporter
user: nobody
sysvInitAction: respawn
Expand Down
2 changes: 1 addition & 1 deletion compute/vm-image-spec-bullseye.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: sql-exporter
user: nobody
sysvInitAction: respawn
Expand Down
3 changes: 3 additions & 0 deletions compute_tools/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ clap.workspace = true
flate2.workspace = true
futures.workspace = true
hyper0 = { workspace = true, features = ["full"] }
metrics.workspace = true
nix.workspace = true
notify.workspace = true
num_cpus.workspace = true
once_cell.workspace = true
opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
postgres.workspace = true
Expand All @@ -39,6 +41,7 @@ tracing-subscriber.workspace = true
tracing-utils.workspace = true
thiserror.workspace = true
url.workspace = true
prometheus.workspace = true

compute_api.workspace = true
utils.workspace = true
Expand Down
15 changes: 11 additions & 4 deletions compute_tools/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,17 @@ pub fn write_postgres_conf(
}

// Locales
writeln!(file, "lc_messages='C.UTF-8'")?;
writeln!(file, "lc_monetary='C.UTF-8'")?;
writeln!(file, "lc_time='C.UTF-8'")?;
writeln!(file, "lc_numeric='C.UTF-8'")?;
if cfg!(target_os = "macos") {
writeln!(file, "lc_messages='C'")?;
writeln!(file, "lc_monetary='C'")?;
writeln!(file, "lc_time='C'")?;
writeln!(file, "lc_numeric='C'")?;
} else {
writeln!(file, "lc_messages='C.UTF-8'")?;
writeln!(file, "lc_monetary='C.UTF-8'")?;
writeln!(file, "lc_time='C.UTF-8'")?;
writeln!(file, "lc_numeric='C.UTF-8'")?;
}

match spec.mode {
ComputeMode::Primary => {}
Expand Down
25 changes: 25 additions & 0 deletions compute_tools/src/http/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::catalog::SchemaDumpError;
use crate::catalog::{get_database_schema, get_dbs_and_roles};
use crate::compute::forward_termination_signal;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use crate::installed_extensions;
use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
use compute_api::responses::{
ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
Expand All @@ -19,6 +20,8 @@ use anyhow::Result;
use hyper::header::CONTENT_TYPE;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use metrics::Encoder;
use metrics::TextEncoder;
use tokio::task;
use tracing::{debug, error, info, warn};
use tracing_utils::http::OtelName;
Expand Down Expand Up @@ -65,6 +68,28 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
}

// Prometheus metrics
(&Method::GET, "/metrics") => {
debug!("serving /metrics GET request");

let mut buffer = vec![];
let metrics = installed_extensions::collect();
let encoder = TextEncoder::new();
encoder.encode(&metrics, &mut buffer).unwrap();

match Response::builder()
.status(StatusCode::OK)
.header(CONTENT_TYPE, encoder.format_type())
.body(Body::from(buffer))
{
Ok(response) => response,
Err(err) => {
let msg = format!("error handling /metrics request: {err}");
error!(msg);
render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
// Collect Postgres current usage insights
(&Method::GET, "/insights") => {
info!("serving /insights GET request");
Expand Down
15 changes: 15 additions & 0 deletions compute_tools/src/http/openapi_spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,21 @@ paths:
schema:
$ref: "#/components/schemas/ComputeMetrics"

/metrics
get:
tags:
- Info
summary: Get compute node metrics in text format.
description: ""
operationId: getComputeMetrics
responses:
200:
description: ComputeMetrics
content:
text/plain:
schema:
type: string
description: Metrics in text format.
/insights:
get:
tags:
Expand Down
31 changes: 28 additions & 3 deletions compute_tools/src/installed_extensions.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use compute_api::responses::{InstalledExtension, InstalledExtensions};
use metrics::proto::MetricFamily;
use std::collections::HashMap;
use std::collections::HashSet;
use tracing::info;
Expand All @@ -8,6 +9,10 @@ use anyhow::Result;
use postgres::{Client, NoTls};
use tokio::task;

use metrics::core::Collector;
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
use once_cell::sync::Lazy;

/// We don't reuse get_existing_dbs() just for code clarity
/// and to make database listing query here more explicit.
///
Expand Down Expand Up @@ -59,6 +64,12 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension

for (extname, v) in extensions.iter() {
let version = v.to_string();

// increment the number of databases where the version of extension is installed
INSTALLED_EXTENSIONS
.with_label_values(&[extname, &version])
.inc();

extensions_map
.entry(extname.to_string())
.and_modify(|e| {
Expand All @@ -74,9 +85,11 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension
}
}

Ok(InstalledExtensions {
let res = InstalledExtensions {
extensions: extensions_map.values().cloned().collect(),
})
};

Ok(res)
})
.await?
}
Expand All @@ -97,6 +110,18 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
"[NEON_EXT_STAT] {}",
serde_json::to_string(&result).expect("failed to serialize extensions list")
);

Ok(())
}

static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"installed_extensions",
"Number of databases where the version of extension is installed",
&["extension_name", "version"]
)
.expect("failed to define a metric")
});

pub fn collect() -> Vec<MetricFamily> {
INSTALLED_EXTENSIONS.collect()
}
2 changes: 1 addition & 1 deletion docs/rfcs/038-aux-file-v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ generating the basebackup by scanning the `REPL_ORIGIN_KEY_PREFIX` keyspace.
There are two places we need to read the aux files from the pageserver:

* On the write path, when the compute node adds an aux file to the pageserver, we will retrieve the key from the storage, append the file to the hashed key, and write it back. The current `get` API already supports that.
* We use the vectored get API to retrieve all aux files during generating the basebackup. Because we need to scan a sparse keyspace, we slightly modified the vectored get path. The vectorized API will attempt to retrieve every single key within the requested key range, and therefore, we modified it in a way that keys within `NON_INHERITED_SPARSE_RANGE` will not trigger missing key error.
* We use the vectored get API to retrieve all aux files during generating the basebackup. Because we need to scan a sparse keyspace, we slightly modified the vectored get path. The vectorized API used to always attempt to retrieve every single key within the requested key range, and therefore, we modified it in a way that keys within `NON_INHERITED_SPARSE_RANGE` will not trigger missing key error. Furthermore, as aux file reads usually need all layer files intersecting with that key range within the branch and cover a big keyspace, it incurs large overhead for tracking keyspaces that have not been read. Therefore, for sparse keyspaces, we [do not track](https://github.com/neondatabase/neon/pull/9631) `ummapped_keyspace`.

## Compaction and Image Layer Generation

Expand Down
6 changes: 5 additions & 1 deletion libs/pageserver_api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,11 @@ pub mod defaults {
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
pub const DEFAULT_LOCALE: &str = "C.UTF-8";
pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
"C"
} else {
"C.UTF-8"
};

pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
Expand Down
8 changes: 4 additions & 4 deletions libs/pageserver_api/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,18 @@ impl NeonWalRecord {
}

#[cfg(feature = "testing")]
pub fn wal_clear() -> Self {
pub fn wal_clear(s: impl AsRef<str>) -> Self {
Self::Test {
append: "".to_string(),
append: s.as_ref().to_string(),
clear: true,
will_init: false,
}
}

#[cfg(feature = "testing")]
pub fn wal_init() -> Self {
pub fn wal_init(s: impl AsRef<str>) -> Self {
Self::Test {
append: "".to_string(),
append: s.as_ref().to_string(),
clear: true,
will_init: true,
}
Expand Down
5 changes: 5 additions & 0 deletions libs/utils/src/auth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ pub enum Scope {
/// Allows access to storage controller APIs used by the scrubber, to interrogate the state
/// of a tenant & post scrub results.
Scrubber,

/// This scope is used for communication with other storage controller instances.
/// At the time of writing, this is only used for the step down request.
#[serde(rename = "controller_peer")]
ControllerPeer,
}

/// JWT payload. See docs/authentication.md for the format
Expand Down
Loading

0 comments on commit 4bfdf46

Please sign in to comment.