Skip to content

Commit

Permalink
add the script that I used to generate the delta stack
Browse files Browse the repository at this point in the history
non-package-mode-py3.10christian@neon-hetzner-dev-christian:[~/src/neon/test_runner]: poetry run python3 deep_layers_with_delta.py
  • Loading branch information
problame committed Dec 12, 2024
1 parent 87755bf commit c326d36
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions test_runner/deep_layers_with_delta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from pathlib import Path
import time
from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
from fixtures.pageserver.http import PageserverHttpClient

ps_http = PageserverHttpClient(port=9898, is_testing_enabled_or_skip=lambda: None)
vps_http = PageserverHttpClient(port=1234, is_testing_enabled_or_skip=lambda: None)

tenants = ps_http.tenant_list()
assert len(tenants) == 1
tenant_id = tenants[0]["id"]

timlines = ps_http.timeline_list(tenant_id)
assert len(timlines) == 1
timeline_id = timlines[0]["timeline_id"]

config = {
"gc_period": "0s", # disable periodic gc
"checkpoint_timeout": "10 years",
"compaction_period": "0s", # disable periodic compaction so we control when it happens
"compaction_threshold": 100000, # we just want L0s
"compaction_target_size": 134217728,
"checkpoint_distance": 268435456,
"image_creation_threshold": 100000, # we just want L0s
}

vps_http.set_tenant_config(tenant_id, config)

connstr = "postgresql://cloud_admin@localhost:55432/postgres"

import psycopg2

def last_record_lsn(
pageserver_http_client: PageserverHttpClient,
tenant: TenantId | TenantShardId,
timeline: TimelineId,
) -> Lsn:
detail = pageserver_http_client.timeline_detail(tenant, timeline)

lsn_str = detail["last_record_lsn"]
assert isinstance(lsn_str, str)
return Lsn(lsn_str)

conn = psycopg2.connect(connstr)
conn.autocommit = True
cur = conn.cursor()

# each tuple is 23 (header) + 100 bytes = 123 bytes
# page header si 24 bytes
# 8k page size
# (8k-24bytes) / 123 bytes = 63 tuples per page
# set fillfactor to 10 to have 6 tuples per page
cur.execute("DROP TABLE IF EXISTS data")
cur.execute("CREATE TABLE data(row char(100)) with (fillfactor=10)")
desired_size = 50 * 1024 * 1024 # 50MiB
need_pages = desired_size // 8192
need_rows = need_pages * 6
print(f"Need {need_pages} pages, {need_rows} rows")
cur.execute(f"INSERT INTO data SELECT i % 6 FROM generate_series(1, {need_rows}) as i")

# every iteration updates one tuple in each page
for i in range( 0, 20):
print(i)
cur.execute(f"UPDATE data set row = ((row::bigint + 1) % 6) where row::bigint % 6 = {i}")
cur.execute("SELECT pg_current_wal_flush_lsn()")
flush_lsn = Lsn(cur.fetchall()[0][0])

while True:
last_record = last_record_lsn(ps_http, tenant_id, timeline_id)
if last_record >= flush_lsn:
break
time.sleep(0.1)

ps_http.timeline_checkpoint(tenant_id, timeline_id)
ps_http.timeline_compact(tenant_id, timeline_id)

0 comments on commit c326d36

Please sign in to comment.