Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
114563: server: fix TestStatusLocalLogs flaky test r=koorosh a=koorosh

This patch uses log interceptor to track exact time when logged entry was submitted instead of suggestion that entry sinked right after it was logged.
Also the time between logs are increased just to make it more safe in case of stress testing.

Epic: None

Release note: None

Resolves: cockroachdb#112375

Co-authored-by: Andrii Vorobiov <[email protected]>
  • Loading branch information
craig[bot] and koorosh committed Nov 21, 2023
2 parents 64dacab + c3f8215 commit efec54b
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 38 deletions.
1 change: 1 addition & 0 deletions pkg/server/storage_api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ go_test(
"//pkg/util/stop",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_redact//:redact",
"@com_github_pkg_errors//:errors",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
Expand Down
116 changes: 78 additions & 38 deletions pkg/server/storage_api/logfiles_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package storage_api_test
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os"
"strings"
Expand All @@ -31,10 +32,41 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/redact"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type logSpy struct {
Timestamp, TimestampE, TimestampEW, TimestampEWI int64
MsgErr, MsgWarn, MsgInf string
}

// Intercept intercepts all log entries and checks if entry's message is one
// that we're interested in and then it updates exact time when this entry
// has been logged.
// It allows to have exact timestamps of logs to test filtering and have
// predictable results.
func (s *logSpy) Intercept(data []byte) {
var e struct {
Message string `json:"message"`
Time int64 `json:"time"`
}
err := json.Unmarshal(data, &e)
if err != nil {
return
}
if e.Message == s.MsgErr {
s.TimestampE = e.Time
}
if e.Message == s.MsgWarn {
s.TimestampEW = e.Time
}
if e.Message == s.MsgInf {
s.TimestampEWI = e.Time
}
}

// TestStatusLocalLogs checks to ensure that local/logfiles,
// local/logfiles/{filename} and local/log function
// correctly.
Expand All @@ -51,35 +83,42 @@ func TestStatusLocalLogs(t *testing.T) {
// there's just one.
defer s.SetupSingleFileLogging()()

srv := serverutils.StartServerOnly(t, base.TestServerArgs{})
srv := serverutils.StartServerOnly(t, base.TestServerArgs{
DefaultTestTenant: base.TestControlsTenantsExplicitly,
})
defer srv.Stopper().Stop(context.Background())
ts := srv.ApplicationLayer()

logCtx := ts.AnnotateCtx(context.Background())

// Log an error of each main type which we expect to be able to retrieve.
// The resolution of our log timestamps is such that it's possible to get
// two subsequent log messages with the same timestamp. This test will fail
// when that occurs. By adding a small sleep in here after each timestamp to
// ensures this isn't the case and that the log filtering doesn't filter out
// the log entires we're looking for. The value of 20 μs was chosen because
// the log timestamps have a fidelity of 10 μs and thus doubling that should
// be a sufficient buffer.
// See util/log/clog.go formatHeader() for more details.
const sleepBuffer = time.Microsecond * 20
timestamp := timeutil.Now().UnixNano()
time.Sleep(sleepBuffer)
log.Errorf(logCtx, "TestStatusLocalLogFile test message-Error")
time.Sleep(sleepBuffer)
timestampE := timeutil.Now().UnixNano()
spy := logSpy{
Timestamp: timeutil.Now().UnixNano(),
MsgWarn: "TestStatusLocalLogFile test message-Warning",
MsgErr: "TestStatusLocalLogFile test message-Error",
MsgInf: "TestStatusLocalLogFile test message-Info",
}

defer log.InterceptWith(logCtx, &spy)()

const sleepBuffer = time.Millisecond * 10
time.Sleep(sleepBuffer)
log.Warningf(logCtx, "TestStatusLocalLogFile test message-Warning")
// Ignoring fmtsafe lintrule is essential in this test to make filtering
// log events easier and compare entry messages later on.
// Changing formatting to log.Error(logCtx, "%s", spy.MsgErr) will wrap
// message with < > brackets and would require to strip them of later.
log.Errorf(logCtx, "%s", redact.Safe(spy.MsgErr))
time.Sleep(sleepBuffer)
timestampEW := timeutil.Now().UnixNano()
log.Warningf(logCtx, "%s", redact.Safe(spy.MsgWarn))
time.Sleep(sleepBuffer)
log.Infof(logCtx, "TestStatusLocalLogFile test message-Info")
log.Infof(logCtx, "%s", redact.Safe(spy.MsgInf))
time.Sleep(sleepBuffer)
timestampEWI := timeutil.Now().UnixNano()

// Ensure all log lines above are written to disk.
log.FlushAllSync()

require.True(t, spy.TimestampE > 0)
require.True(t, spy.TimestampEW > 0)
require.True(t, spy.TimestampEWI > 0)

var wrapper serverpb.LogFilesListResponse
if err := srvtestutils.GetStatusJSONProto(ts, "logfiles/local", &wrapper); err != nil {
Expand All @@ -98,11 +137,11 @@ func TestStatusLocalLogs(t *testing.T) {
}
for _, entry := range wrapper.Entries {
switch strings.TrimSpace(entry.Message) {
case "TestStatusLocalLogFile test message-Error":
case spy.MsgErr:
foundError = true
case "TestStatusLocalLogFile test message-Warning":
case spy.MsgWarn:
foundWarning = true
case "TestStatusLocalLogFile test message-Info":
case spy.MsgInf:
foundInfo = true
}
}
Expand All @@ -125,17 +164,18 @@ func TestStatusLocalLogs(t *testing.T) {
}{
// Test filtering by log severity.
// // Test entry limit. Ignore Info/Warning/Error filters.
{1, timestamp, timestampEWI, "", levelPresence{false, false, false}},
{2, timestamp, timestampEWI, "", levelPresence{false, false, false}},
{3, timestamp, timestampEWI, "", levelPresence{false, false, false}},
{1, spy.Timestamp, spy.TimestampEWI, "", levelPresence{false, false, false}},
{2, spy.Timestamp, spy.TimestampEWI, "", levelPresence{false, false, false}},
{3, spy.Timestamp, spy.TimestampEWI, "", levelPresence{false, false, false}},
// Test filtering in different timestamp windows.
{0, timestamp, timestamp, "", levelPresence{false, false, false}},
{0, timestamp, timestampE, "", levelPresence{true, false, false}},
{0, timestampE, timestampEW, "", levelPresence{false, true, false}},
{0, timestampEW, timestampEWI, "", levelPresence{false, false, true}},
{0, timestamp, timestampEW, "", levelPresence{true, true, false}},
{0, timestampE, timestampEWI, "", levelPresence{false, true, true}},
{0, timestamp, timestampEWI, "", levelPresence{true, true, true}},
{0, spy.Timestamp, spy.Timestamp, "", levelPresence{false, false, false}},
{0, spy.Timestamp, spy.TimestampE, "", levelPresence{true, false, false}},
// spy.TimestampE is the exact timestamp when entry has been logged but we want to specify time after this event, that's why it is incremented by 1.
{0, spy.TimestampE + 1, spy.TimestampEW, "", levelPresence{false, true, false}},
{0, spy.TimestampEW + 1, spy.TimestampEWI, "", levelPresence{false, false, true}},
{0, spy.Timestamp, spy.TimestampEW, "", levelPresence{true, true, false}},
{0, spy.TimestampE + 1, spy.TimestampEWI, "", levelPresence{false, true, true}},
{0, spy.Timestamp, spy.TimestampEWI, "", levelPresence{true, true, true}},
// Test filtering by regexp pattern.
{0, 0, 0, "Info", levelPresence{false, false, true}},
{0, 0, 0, "Warning", levelPresence{false, true, false}},
Expand All @@ -153,7 +193,7 @@ func TestStatusLocalLogs(t *testing.T) {
if testCase.StartTimestamp > 0 {
fmt.Fprintf(&url, "&start_time=%d", testCase.StartTimestamp)
}
if testCase.StartTimestamp > 0 {
if testCase.EndTimestamp > 0 {
fmt.Fprintf(&url, "&end_time=%d", testCase.EndTimestamp)
}
if len(testCase.Pattern) > 0 {
Expand All @@ -177,17 +217,17 @@ func TestStatusLocalLogs(t *testing.T) {
fmt.Fprintln(&logsBuf, entry.Message)

switch strings.TrimSpace(entry.Message) {
case "TestStatusLocalLogFile test message-Error":
case spy.MsgErr:
actual.Error = true
case "TestStatusLocalLogFile test message-Warning":
case spy.MsgWarn:
actual.Warning = true
case "TestStatusLocalLogFile test message-Info":
case spy.MsgInf:
actual.Info = true
}
}

if testCase.levelPresence != actual {
t.Errorf("%d: expected %+v at %s, got:\n%s", i, testCase, path, logsBuf.String())
t.Errorf("%d: expected %+v at %s, got:\n%s \n\n spy = %+v\n", i, testCase, path, logsBuf.String(), spy)
}
}
}
Expand Down

0 comments on commit efec54b

Please sign in to comment.