From ed62027e604261af3e9c42c7a50092a0db074f19 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 8 Mar 2024 18:07:34 +0100 Subject: [PATCH] ktesting: tone down warning about leaked test goroutine Not terminating immediately is fairly normal: many controllers log "terminating" messages while they shut down, which often is right after test completion, if that is when the test cancels the context and then doesn't wait for goroutines (which is often not possible). Therefore the warning now only gets printed if that happens after more than 10 seconds since test completion, and then the warning is only printed as an info message, not an error. --- ktesting/testinglogger.go | 30 ++++++++++++++++++++++++++---- ktesting/testinglogger_test.go | 15 ++++++++++----- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/ktesting/testinglogger.go b/ktesting/testinglogger.go index 37d5ca69..a3d9b53e 100644 --- a/ktesting/testinglogger.go +++ b/ktesting/testinglogger.go @@ -217,8 +217,19 @@ type tloggerShared struct { // t gets cleared when the test is completed. t TL - // We warn once when a leaked goroutine is detected because - // it logs after test completion. + // The time when the test completed. + stopTime time.Time + + // We warn once when a leaked goroutine logs after test completion. + // + // Not terminating immediately is fairly normal: many controllers + // log "terminating" messages while they shut down, which often is + // right after test completion, if that is when the test cancels the + // context and then doesn't wait for goroutines (which is often + // not possible). + // + // Therefore there is the [stopGracePeriod] during which messages get + // logged to the global logger without the warning. goroutineWarningDone bool formatter serialize.Formatter @@ -228,10 +239,15 @@ type tloggerShared struct { callDepth int } +// Log output of a leaked goroutine during this period after test completion +// does not trigger the warning. +const stopGracePeriod = 10 * time.Second + func (ls *tloggerShared) stop() { ls.mutex.Lock() defer ls.mutex.Unlock() ls.t = nil + ls.stopTime = time.Now() } // tlogger is the actual LogSink implementation. @@ -241,6 +257,8 @@ type tlogger struct { values []interface{} } +// fallbackLogger is called while l.shared.mutex is locked and after it has +// been determined that the original testing.TB is no longer usable. func (l tlogger) fallbackLogger() logr.Logger { logger := klog.Background().WithValues(l.values...).WithName(l.shared.testName + " leaked goroutine") if l.prefix != "" { @@ -250,8 +268,12 @@ func (l tlogger) fallbackLogger() logr.Logger { logger = logger.WithCallDepth(l.shared.callDepth + 1) if !l.shared.goroutineWarningDone { - logger.WithCallDepth(1).Error(nil, "WARNING: test kept at least one goroutine running after test completion", "callstack", string(dbg.Stacks(false))) - l.shared.goroutineWarningDone = true + duration := time.Since(l.shared.stopTime) + if duration > stopGracePeriod { + + logger.WithCallDepth(1).Info("WARNING: test kept at least one goroutine running after test completion", "timeSinceCompletion", duration.Round(time.Second), "callstack", string(dbg.Stacks(false))) + l.shared.goroutineWarningDone = true + } } return logger } diff --git a/ktesting/testinglogger_test.go b/ktesting/testinglogger_test.go index 01769261..df50750a 100644 --- a/ktesting/testinglogger_test.go +++ b/ktesting/testinglogger_test.go @@ -16,6 +16,7 @@ import ( "runtime" "sync" "testing" + "time" "k8s.io/klog/v2" "k8s.io/klog/v2/internal/test/require" @@ -198,6 +199,7 @@ func TestStop(t *testing.T) { _, _, line, _ = runtime.Caller(0) logger.Info("simple info message") logger.Error(nil, "error message") + time.Sleep(15 * time.Second) logger.WithName("me").WithValues("completed", true).Info("complex info message", "anotherValue", 1) }() }) @@ -212,11 +214,14 @@ func TestStop(t *testing.T) { // Strip time and pid prefix. actual = regexp.MustCompile(`(?m)^.* testinglogger_test.go:`).ReplaceAllString(actual, `testinglogger_test.go:`) + // Strip duration. + actual = regexp.MustCompile(`timeSinceCompletion="\d+s"`).ReplaceAllString(actual, `timeSinceCompletion="<...>s"`) + // All lines from the callstack get stripped. We can be sure that it was non-empty because otherwise we wouldn't // have the < > markers. // // Full output: - // testinglogger_test.go:194] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine" callstack=< + // testinglogger_test.go:194] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine.me" completed=true timeSinceCompletion="15s" callstack=< // goroutine 23 [running]: // k8s.io/klog/v2/internal/dbg.Stacks(0x0) // /nvme/gopath/src/k8s.io/klog/internal/dbg/dbg.go:34 +0x8a @@ -233,13 +238,13 @@ func TestStop(t *testing.T) { // > actual = regexp.MustCompile(`(?m)^\t.*?\n`).ReplaceAllString(actual, ``) - expected := fmt.Sprintf(`testinglogger_test.go:%d] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine" callstack=< - > -testinglogger_test.go:%d] "simple info message" logger="TestStop/Sub leaked goroutine" + expected := fmt.Sprintf(`testinglogger_test.go:%d] "simple info message" logger="TestStop/Sub leaked goroutine" testinglogger_test.go:%d] "error message" logger="TestStop/Sub leaked goroutine" +testinglogger_test.go:%d] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine.me" completed=true timeSinceCompletion="<...>s" callstack=< + > testinglogger_test.go:%d] "complex info message" logger="TestStop/Sub leaked goroutine.me" completed=true anotherValue=1 `, - line+1, line+1, line+2, line+3) + line+1, line+2, line+4, line+4) if actual != expected { t.Errorf("Output does not match. Expected:\n%s\nActual:\n%s\n", expected, actual) }