Skip to content

Commit

Permalink
test(pageserver): add detach ancestor smoke test (#9842)
Browse files Browse the repository at this point in the history
## Problem

Follow up to #9682, hopefully
we can detect some issues or assure ourselves that this is ready for
production.

## Summary of changes

* Add a compaction-detach-ancestor smoke test.

---------

Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh authored Nov 22, 2024
1 parent e939d36 commit 6f8b1eb
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 4 deletions.
2 changes: 1 addition & 1 deletion test_runner/fixtures/pageserver/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def tenant_list_locations(self):
assert isinstance(res_json["tenant_shards"], list)
return res_json

def tenant_get_location(self, tenant_id: TenantShardId):
def tenant_get_location(self, tenant_id: TenantId | TenantShardId):
res = self.get(
f"http://localhost:{self.port}/v1/location_config/{tenant_id}",
)
Expand Down
5 changes: 3 additions & 2 deletions test_runner/fixtures/workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,10 @@ def stop(self):
def __del__(self):
self.stop()

def init(self, pageserver_id: int | None = None):
def init(self, pageserver_id: int | None = None, allow_recreate=False):
endpoint = self.endpoint(pageserver_id)

if allow_recreate:
endpoint.safe_psql(f"DROP TABLE IF EXISTS {self.table};")
endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
last_flush_lsn_upload(
Expand Down
54 changes: 53 additions & 1 deletion test_runner/regress/test_timeline_detach_ancestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_timeline_detail_404
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
from fixtures.utils import assert_pageserver_backups_equal, wait_until
from fixtures.utils import assert_pageserver_backups_equal, skip_in_debug_build, wait_until
from fixtures.workload import Workload
from requests import ReadTimeout


Expand Down Expand Up @@ -1550,6 +1551,57 @@ def pausepoint_hit_with_gc_paused() -> LogCursor:
env.pageserver.assert_log_contains(".* gc_loop.*: 1 timelines need GC", offset)


@skip_in_debug_build("only run with release build")
def test_pageserver_compaction_detach_ancestor_smoke(neon_env_builder: NeonEnvBuilder):
SMOKE_CONF = {
# Run both gc and gc-compaction.
"gc_period": "5s",
"compaction_period": "5s",
# No PiTR interval and small GC horizon
"pitr_interval": "0s",
"gc_horizon": f"{1024 ** 2}",
"lsn_lease_length": "0s",
# Small checkpoint distance to create many layers
"checkpoint_distance": 1024**2,
# Compact small layers
"compaction_target_size": 1024**2,
"image_creation_threshold": 2,
}

env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)

tenant_id = env.initial_tenant
timeline_id = env.initial_timeline

row_count = 10000
churn_rounds = 50

ps_http = env.pageserver.http_client()

workload_parent = Workload(env, tenant_id, timeline_id)
workload_parent.init(env.pageserver.id)
log.info("Writing initial data ...")
workload_parent.write_rows(row_count, env.pageserver.id)
branch_id = env.create_branch("child")
workload_child = Workload(env, tenant_id, branch_id, branch_name="child")
workload_child.init(env.pageserver.id, allow_recreate=True)
log.info("Writing initial data on child...")
workload_child.write_rows(row_count, env.pageserver.id)

for i in range(1, churn_rounds + 1):
if i % 10 == 0:
log.info(f"Running churn round {i}/{churn_rounds} ...")

workload_parent.churn_rows(row_count, env.pageserver.id)
workload_child.churn_rows(row_count, env.pageserver.id)

ps_http.detach_ancestor(tenant_id, branch_id)

log.info("Validating at workload end ...")
workload_parent.validate(env.pageserver.id)
workload_child.validate(env.pageserver.id)


# TODO:
# - branch near existing L1 boundary, image layers?
# - investigate: why are layers started at uneven lsn? not just after branching, but in general.
Expand Down

1 comment on commit 6f8b1eb

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5635 tests run: 5406 passed, 1 failed, 228 skipped (full report)


Failures on Postgres 16

# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_sharded_ingest[release-pg16-github-actions-selfhosted-1]"

Code coverage* (full report)

  • functions: 31.4% (7956 of 25344 functions)
  • lines: 49.3% (63146 of 128104 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
6f8b1eb at 2024-11-22T20:12:53.166Z :recycle:

Please sign in to comment.