Skip to content

Commit

Permalink
CI: run tests with both Pageserver VirtualFile IO engines
Browse files Browse the repository at this point in the history
- control via env var PAGESERVER_VIRTUAL_FILE_IO_ENGINE
- if an io engine other than std-fs is used, it shows up in the test
  name; this is so that we can continue to use the flaky tests database
- raise memlock limit & while at it also raise shmem limit
  for the Rust tests. It's need on our older runners that use
  an older 5.10.X LTS kernel, where io_uring SQ and CQ still
  counted towards the rlimit, see
  #6373 (comment)
  for details.

Co-authored-by: Alexander Bayandin <[email protected]>
  • Loading branch information
problame and bayandin committed Jan 24, 2024
1 parent 2b4ff34 commit 2cf5a4c
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 16 deletions.
25 changes: 18 additions & 7 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,11 @@ jobs:
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
options: --init
# Raise locked memory limit for tokio-epoll-uring.
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
# io_uring will account the memory of the CQ and SQ as locked.
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -341,7 +345,9 @@ jobs:
- name: Run rust tests
run: |
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
done
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
Expand Down Expand Up @@ -419,13 +425,14 @@ jobs:
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
# Default shared memory is 64mb
options: --init --shm-size=512mb
# for changed limits, see comments on `options:` earlier in this file
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
pg_version: [ v14, v15, v16 ]
pageserver_virtual_file_io_engine: [ std-fs, tokio-epoll-uring ]
steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -448,6 +455,7 @@ jobs:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: ${{ matrix.pageserver_virtual_file_io_engine }}

- name: Merge and upload coverage data
if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
Expand All @@ -458,14 +466,16 @@ jobs:
runs-on: [ self-hosted, gen3, small ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
# Default shared memory is 64mb
options: --init --shm-size=512mb
# for changed limits, see comments on `options:` earlier in this file
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
strategy:
fail-fast: false
matrix:
# the amount of groups (N) should be reflected in `extra_params: --splits N ...`
pytest_split_group: [ 1, 2, 3, 4 ]
build_type: [ release ]
pageserver_virtual_file_io_engine: [ std-fs, tokio-epoll-uring ]
steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -477,11 +487,12 @@ jobs:
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ github.ref_name == 'main' }}
extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }}
extra_params: --splits 4 --group ${{ matrix.pytest_split_group }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: "${{ matrix.pageserver_virtual_file_io_engine }}"
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones

Expand Down
14 changes: 12 additions & 2 deletions scripts/flaky_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import json
import logging
import os
from collections import defaultdict
from typing import DefaultDict, Dict

Expand Down Expand Up @@ -45,6 +46,15 @@ def main(args: argparse.Namespace):
logging.error("cannot fetch flaky tests from the DB due to an error", exc)
rows = []

# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_ENGINE (i.e. not empty, not std-fs),
# use it to parametrize test name along with build_type and pg_version
#
# See test_runner/fixtures/parametrize.py for details
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"):
pageserver_virtual_file_io_engine_parameter = f"-{io_engine}"
else:
pageserver_virtual_file_io_engine_parameter = ""

for row in rows:
# We don't want to automatically rerun tests in a performance suite
if row["parent_suite"] != "test_runner.regress":
Expand All @@ -53,10 +63,10 @@ def main(args: argparse.Namespace):
if row["name"].endswith("]"):
parametrized_test = row["name"].replace(
"[",
f"[{build_type}-pg{pg_version}-",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}-",
)
else:
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}]"
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}]"

res[row["parent_suite"]][row["suite"]][parametrized_test] = True

Expand Down
4 changes: 4 additions & 0 deletions test_runner/fixtures/neon_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,7 @@ def _shared_simple_env(
neon_binpath: Path,
pg_distrib_dir: Path,
pg_version: PgVersion,
pageserver_virtual_file_io_engine: str,
) -> Iterator[NeonEnv]:
"""
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
Expand Down Expand Up @@ -1226,6 +1227,7 @@ def _shared_simple_env(
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
test_name=request.node.name,
test_output_dir=test_output_dir,
pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
) as builder:
env = builder.init_start()

Expand Down Expand Up @@ -1264,6 +1266,7 @@ def neon_env_builder(
request: FixtureRequest,
test_overlay_dir: Path,
top_output_dir: Path,
pageserver_virtual_file_io_engine: str,
) -> Iterator[NeonEnvBuilder]:
"""
Fixture to create a Neon environment for test.
Expand Down Expand Up @@ -1293,6 +1296,7 @@ def neon_env_builder(
broker=default_broker,
run_id=run_id,
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
test_name=request.node.name,
test_output_dir=test_output_dir,
test_overlay_dir=test_overlay_dir,
Expand Down
22 changes: 15 additions & 7 deletions test_runner/fixtures/parametrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fixtures.pg_version import PgVersion

"""
Dynamically parametrize tests by Postgres version and build type (debug/release/remote)
Dynamically parametrize tests by Postgres version, build type (debug/release/remote), and possibly by other parameters
"""


Expand All @@ -31,11 +31,12 @@ def build_type(request: FixtureRequest) -> Optional[str]:
return None


def pytest_generate_tests(metafunc: Metafunc):
# Do not parametrize performance tests yet, we need to prepare grafana charts first
if "test_runner/performance" in metafunc.definition._nodeid:
return
@pytest.fixture(scope="function", autouse=True)
def pageserver_virtual_file_io_engine(request: FixtureRequest) -> Optional[str]:
return None


def pytest_generate_tests(metafunc: Metafunc):
if (v := os.environ.get("DEFAULT_PG_VERSION")) is None:
pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET]
else:
Expand All @@ -46,5 +47,12 @@ def pytest_generate_tests(metafunc: Metafunc):
else:
build_types = [bt.lower()]

metafunc.parametrize("build_type", build_types)
metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions))
# Do not parametrize performance tests yet by Postgres version or build type, we need to prepare grafana charts first
if "test_runner/performance" not in metafunc.definition._nodeid:
metafunc.parametrize("build_type", build_types)
metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions))

# A hacky way to parametrize tests only for `pageserver_virtual_file_io_engine=tokio-epoll-uring`
# And do not change test name for default `pageserver_virtual_file_io_engine=std-fs` to keep tests statistics
if (io_engine := os.environ.get("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"):
metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine])

0 comments on commit 2cf5a4c

Please sign in to comment.