Skip to content

Commit

Permalink
Make the test reproduce the issues
Browse files Browse the repository at this point in the history
  • Loading branch information
arpad-m committed Nov 14, 2024
1 parent c67cd22 commit e793e52
Showing 1 changed file with 44 additions and 23 deletions.
67 changes: 44 additions & 23 deletions test_runner/regress/test_timeline_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
last_flush_lsn_upload,
)
from fixtures.pageserver.http import PageserverApiException
from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty, list_prefix
from fixtures.pageserver.utils import (
assert_prefix_empty,
assert_prefix_not_empty,
list_prefix,
wait_until_tenant_active,
)
from fixtures.remote_storage import S3Storage, s3_storage
from fixtures.utils import wait_until
from mypy_boto3_s3.type_defs import (
Expand Down Expand Up @@ -378,48 +383,60 @@ def child_offloaded():
)


@pytest.mark.parametrize("with_intermediary", [False, True])
@pytest.mark.parametrize(
"offload_child",
[
"offload",
"offload-corrupt",
"offload-no-restart",
"offload-parent-no-restart",
"offload-parent",
"archive",
None,
],
)
def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Optional[str]):
def test_timeline_retain_lsn(
neon_env_builder: NeonEnvBuilder, with_intermediary: bool, offload_child: Optional[str]
):
"""
Ensure that retain_lsn functionality for timelines works, both for offloaded and non-offloaded ones
"""
if offload_child == "offload-corrupt":
# Our corruption code only works with S3 compatible storage
neon_env_builder.enable_pageserver_remote_storage(s3_storage())

neon_env_builder.rust_log_override = "info,[gc_timeline]=debug"
env = neon_env_builder.init_start()
ps_http = env.pageserver.http_client()

# Turn off gc and compaction loops: we want to issue them manually for better reliability
tenant_id, root_timeline_id = env.create_tenant(
conf={
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": 128 * 1024,
"checkpoint_distance": 32 * 1024,
"compaction_threshold": 1,
"compaction_target_size": 128 * 1024,
"compaction_target_size": 32 * 1024,
# set small image creation thresholds so that gc deletes data
"image_creation_threshold": 2,
"image_creation_threshold": 1,
# disable background compaction and GC. We invoke it manually when we want it to happen.
"gc_period": "0s",
"compaction_period": "0s",
# Disable pitr, we only want the latest lsn
"pitr_interval": "0s",
"gc_horizon": 0,
# Don't rely on endpoint lsn leases
"lsn_lease_length": "0s",
}
)

with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
if with_intermediary:
parent_branch_name = "test_archived_parent"
parent_timeline_id = env.create_branch("test_archived_parent", tenant_id)
else:
parent_branch_name = "main"
parent_timeline_id = root_timeline_id

with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint:
endpoint.safe_psql_many(
[
"CREATE TABLE foo(v int, key serial primary key, t text default 'data_content')",
Expand All @@ -429,14 +446,16 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
)
pre_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
log.info(f"Pre branch sum: {pre_branch_sum}")
last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)
last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id)

# Create a branch and write some additional data to the parent
child_timeline_id = env.create_branch("test_archived_branch", tenant_id)
child_timeline_id = env.create_branch(
"test_archived_branch", tenant_id, ancestor_branch_name=parent_branch_name
)

with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
# Do some churn of the data. This is important so that we can overwrite image layers.
for i in range(10):
with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint:
# Do some overwriting churn with compactions in between. This is important so that we can overwrite image layers.
for i in range(5):
endpoint.safe_psql_many(
[
f"SELECT setseed(0.23{i})",
Expand All @@ -445,9 +464,9 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
"UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 0",
]
)
last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id)
post_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
log.info(f"Post branch sum: {post_branch_sum}")
last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)

if offload_child is not None:
ps_http.timeline_archival_config(
Expand All @@ -463,19 +482,14 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
if "offload" in offload_child:
ps_http.timeline_offload(tenant_id, child_timeline_id)
if "offload-parent" in offload_child:
# Do a cycle of offload then unoffload to ensure the retain_lsn of the child
# Also offload the parent to ensure the retain_lsn of the child
# is entered in the parent at unoffloading
ps_http.timeline_archival_config(
tenant_id,
root_timeline_id,
parent_timeline_id,
state=TimelineArchivalState.ARCHIVED,
)
ps_http.timeline_offload(tenant_id, root_timeline_id)
ps_http.timeline_archival_config(
tenant_id,
root_timeline_id,
state=TimelineArchivalState.UNARCHIVED,
)
ps_http.timeline_offload(tenant_id, parent_timeline_id)

# Do a restart to get rid of any in-memory objects (we only init gc info once, at attach)
if offload_child is None or not ("no-restart" in offload_child):
Expand Down Expand Up @@ -516,12 +530,19 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op
)
if offload_child is None or not ("no-restart" in offload_child):
env.pageserver.start()
if offload_child == "offload-parent":
wait_until_tenant_active(ps_http, tenant_id=tenant_id)
ps_http.timeline_archival_config(
tenant_id,
parent_timeline_id,
state=TimelineArchivalState.UNARCHIVED,
)

# Do an agressive gc and compaction of the parent branch
ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=root_timeline_id, gc_horizon=0)
ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=parent_timeline_id, gc_horizon=0)
ps_http.timeline_checkpoint(
tenant_id,
root_timeline_id,
parent_timeline_id,
force_l0_compaction=True,
force_repartition=True,
wait_until_uploaded=True,
Expand Down

0 comments on commit e793e52

Please sign in to comment.