diff --git a/test_runner/deep_layers_with_delta.py b/test_runner/deep_layers_with_delta.py new file mode 100644 index 000000000000..9d8dea0bf5cf --- /dev/null +++ b/test_runner/deep_layers_with_delta.py @@ -0,0 +1,76 @@ +from pathlib import Path +import time +from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId +from fixtures.pageserver.http import PageserverHttpClient + +ps_http = PageserverHttpClient(port=9898, is_testing_enabled_or_skip=lambda: None) +vps_http = PageserverHttpClient(port=1234, is_testing_enabled_or_skip=lambda: None) + +tenants = ps_http.tenant_list() +assert len(tenants) == 1 +tenant_id = tenants[0]["id"] + +timlines = ps_http.timeline_list(tenant_id) +assert len(timlines) == 1 +timeline_id = timlines[0]["timeline_id"] + +config = { + "gc_period": "0s", # disable periodic gc + "checkpoint_timeout": "10 years", + "compaction_period": "0s", # disable periodic compaction so we control when it happens + "compaction_threshold": 100000, # we just want L0s + "compaction_target_size": 134217728, + "checkpoint_distance": 268435456, + "image_creation_threshold": 100000, # we just want L0s +} + +vps_http.set_tenant_config(tenant_id, config) + +connstr = "postgresql://cloud_admin@localhost:55432/postgres" + +import psycopg2 + +def last_record_lsn( + pageserver_http_client: PageserverHttpClient, + tenant: TenantId | TenantShardId, + timeline: TimelineId, +) -> Lsn: + detail = pageserver_http_client.timeline_detail(tenant, timeline) + + lsn_str = detail["last_record_lsn"] + assert isinstance(lsn_str, str) + return Lsn(lsn_str) + +conn = psycopg2.connect(connstr) +conn.autocommit = True +cur = conn.cursor() + +# each tuple is 23 (header) + 100 bytes = 123 bytes +# page header si 24 bytes +# 8k page size +# (8k-24bytes) / 123 bytes = 63 tuples per page +# set fillfactor to 10 to have 6 tuples per page +cur.execute("DROP TABLE IF EXISTS data") +cur.execute("CREATE TABLE data(row char(100)) with (fillfactor=10)") +desired_size = 50 * 1024 * 1024 # 50MiB +need_pages = desired_size // 8192 +need_rows = need_pages * 6 +print(f"Need {need_pages} pages, {need_rows} rows") +cur.execute(f"INSERT INTO data SELECT i % 6 FROM generate_series(1, {need_rows}) as i") + +# every iteration updates one tuple in each page +for i in range( 0, 20): + print(i) + cur.execute(f"UPDATE data set row = ((row::bigint + 1) % 6) where row::bigint % 6 = {i}") + cur.execute("SELECT pg_current_wal_flush_lsn()") + flush_lsn = Lsn(cur.fetchall()[0][0]) + + while True: + last_record = last_record_lsn(ps_http, tenant_id, timeline_id) + if last_record >= flush_lsn: + break + time.sleep(0.1) + + ps_http.timeline_checkpoint(tenant_id, timeline_id) + ps_http.timeline_compact(tenant_id, timeline_id) +