Skip to content

Commit

Permalink
fix: demote warnings, fix flakyness (#4837)
Browse files Browse the repository at this point in the history
`WARN ... found future (image|delta) layer` are not actionable log
lines. They don't need to be warnings. `info!` is enough.

This also fixes some known but not tracked flakyness in
[`test_remote_timeline_client_calls_started_metric`][evidence].

[evidence]:
https://neon-github-public-dev.s3.amazonaws.com/reports/pr-4829/5683495367/index.html#/testresult/34fe79e24729618b

Closes #3369.
Closes #4473.
  • Loading branch information
koivunej authored Jul 31, 2023
1 parent a8f3540 commit 89ee8f2
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 19 deletions.
8 changes: 4 additions & 4 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1600,7 +1600,7 @@ impl Timeline {
if let Some(imgfilename) = ImageFileName::parse_str(&fname) {
// create an ImageLayer struct for each image file.
if imgfilename.lsn > disk_consistent_lsn {
warn!(
info!(
"found future image layer {} on timeline {} disk_consistent_lsn is {}",
imgfilename, self.timeline_id, disk_consistent_lsn
);
Expand Down Expand Up @@ -1632,7 +1632,7 @@ impl Timeline {
// is 102, then it might not have been fully flushed to disk
// before crash.
if deltafilename.lsn_range.end > disk_consistent_lsn + 1 {
warn!(
info!(
"found future delta layer {} on timeline {} disk_consistent_lsn is {}",
deltafilename, self.timeline_id, disk_consistent_lsn
);
Expand Down Expand Up @@ -1774,7 +1774,7 @@ impl Timeline {
match remote_layer_name {
LayerFileName::Image(imgfilename) => {
if imgfilename.lsn > up_to_date_disk_consistent_lsn {
warn!(
info!(
"found future image layer {} on timeline {} remote_consistent_lsn is {}",
imgfilename, self.timeline_id, up_to_date_disk_consistent_lsn
);
Expand All @@ -1799,7 +1799,7 @@ impl Timeline {
// is 102, then it might not have been fully flushed to disk
// before crash.
if deltafilename.lsn_range.end > up_to_date_disk_consistent_lsn + 1 {
warn!(
info!(
"found future delta layer {} on timeline {} remote_consistent_lsn is {}",
deltafilename, self.timeline_id, up_to_date_disk_consistent_lsn
);
Expand Down
4 changes: 0 additions & 4 deletions test_runner/regress/test_gc_cutoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
env = neon_env_builder.init_start()

# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future image layer.*")
env.pageserver.allowed_errors.append(".*found future delta layer.*")

pageserver_http = env.pageserver.http_client()

# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
Expand Down
4 changes: 0 additions & 4 deletions test_runner/regress/test_pageserver_restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()

# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future image layer.*")
env.pageserver.allowed_errors.append(".*found future delta layer.*")

# Use a tiny checkpoint distance, to create a lot of layers quickly.
# That allows us to stress the compaction and layer flushing logic more.
tenant, _ = env.neon_cli.create_tenant(
Expand Down
4 changes: 0 additions & 4 deletions test_runner/regress/test_recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.pageserver.is_testing_enabled_or_skip()

# These warnings are expected, when the pageserver is restarted abruptly
env.pageserver.allowed_errors.append(".*found future delta layer.*")
env.pageserver.allowed_errors.append(".*found future image layer.*")

# Create a branch for us
env.neon_cli.create_branch("test_pageserver_recovery", "main")

Expand Down
3 changes: 0 additions & 3 deletions test_runner/regress/test_remote_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,6 @@ def churn_while_failpoints_active(result):
# XXX: should vary this test to selectively fail just layer uploads, index uploads, deletions
# but how do we validate the result after restore?

# these are always possible when we do an immediate stop. perhaps something with compacting has changed since.
env.pageserver.allowed_errors.append(r".*found future (delta|image) layer.*")

env.pageserver.stop(immediate=True)
env.endpoints.stop_all()

Expand Down

1 comment on commit 89ee8f2

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1308 tests run: 1250 passed, 0 failed, 58 skipped (full report)


Flaky tests (1)

Postgres 15

  • test_threshold_based_eviction: debug

Please sign in to comment.