From 62f0e836087f3500ce9a08ce967987ff705ac265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Leszczy=C5=84ski?= <2000michal@wp.pl> Date: Wed, 4 Dec 2024 09:17:36 +0100 Subject: [PATCH] feat(scyllaclient): benchmark rclone list dir iter --- .../client_rclone_agent_integration_test.go | 76 +++++++++++++++++++ pkg/testutils/s3.go | 7 ++ 2 files changed, 83 insertions(+) diff --git a/pkg/scyllaclient/client_rclone_agent_integration_test.go b/pkg/scyllaclient/client_rclone_agent_integration_test.go index b2db38af66..e2bd67f345 100644 --- a/pkg/scyllaclient/client_rclone_agent_integration_test.go +++ b/pkg/scyllaclient/client_rclone_agent_integration_test.go @@ -9,6 +9,7 @@ import ( "bytes" "context" "fmt" + "os" "path" "strings" "testing" @@ -20,6 +21,7 @@ import ( "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" . "github.com/scylladb/scylla-manager/v3/pkg/testutils" . "github.com/scylladb/scylla-manager/v3/pkg/testutils/testconfig" + "github.com/scylladb/scylla-manager/v3/pkg/util/timeutc" "go.uber.org/zap/zapcore" ) @@ -605,6 +607,80 @@ func TestRcloneSuffixOptionIntegration(t *testing.T) { } } +func BenchmarkRcloneListDirIterIntegration(b *testing.B) { + bucket := S3BucketPath(testBucket) + if err := os.RemoveAll(bucket); err != nil { + b.Fatal(err) + } + if err := os.Mkdir(bucket, 0o700); err != nil { + b.Fatal(err) + } + + client, err := scyllaclient.NewClient(scyllaclient.TestConfig(ManagedClusterHosts(), AgentAuthToken()), log.NewDevelopmentWithLevel(zapcore.ErrorLevel)) + if err != nil { + b.Fatal(err) + } + + const fileCnt = 5555 + Printf("Given: dir with %d files", fileCnt) + for i := 0; i < fileCnt; i++ { + f, err := os.Create(path.Join(bucket, fmt.Sprint(i))) + if err != nil { + b.Fatal(err) + } + _ = f.Close() + } + + Print("When: check list iter latency") + // ListCB is implemented only for the non-recursive listings (take a look at rcChunkedList) + opts := &scyllaclient.RcloneListDirOpts{ + Recurse: false, + } + // 1000 is the default chunk size for s3 + const rcloneChunkSize = 1000 + const sampleSize = 1000 + var avgTotal, avgFirst, avgCross, avgWithin time.Duration + for sample := range sampleSize { + var firstDiff, maxCrossChunkDiff, maxWithinChunkDiff time.Duration + idx := 0 + lastCB := timeutc.Now() + start := timeutc.Now() + err = client.RcloneListDirIter(context.Background(), ManagedClusterHost(), remotePath(""), opts, func(item *scyllaclient.RcloneListDirItem) { + idx++ + now := timeutc.Now() + if idx == 1 { + firstDiff = now.Sub(lastCB) + lastCB = now + return + } + diff := now.Sub(lastCB) + lastCB = now + if idx%rcloneChunkSize == 1 { + maxCrossChunkDiff = max(maxCrossChunkDiff, diff) + } else { + maxWithinChunkDiff = max(maxWithinChunkDiff, diff) + } + }) + if err != nil { + b.Fatal(err) + } + total := timeutc.Now().Sub(start) + + b.Log("sample: ", sample, "total: ", total, "first: ", firstDiff, "maxCross: ", maxCrossChunkDiff, "maxWithin: ", maxWithinChunkDiff) + avgTotal += total + avgFirst += firstDiff + avgCross += maxCrossChunkDiff + avgWithin += maxWithinChunkDiff + } + + avgTotal /= sampleSize + avgFirst /= sampleSize + avgCross /= sampleSize + avgWithin /= sampleSize + + b.Log("Avg total time: ", avgTotal, "Avg first latency: ", avgFirst, "Avg max cross chunk latency: ", avgCross, "Avg max within chunk latency: ", avgWithin) +} + func validateDirContents(ctx context.Context, client *scyllaclient.Client, host, remotePath string, files map[string]string) error { // Check if all specified files with given contents are present for f, expected := range files { diff --git a/pkg/testutils/s3.go b/pkg/testutils/s3.go index 67e22e89bf..5ab269b24c 100644 --- a/pkg/testutils/s3.go +++ b/pkg/testutils/s3.go @@ -45,3 +45,10 @@ func S3Credentials() (provider, endpoint, accessKeyID, secretAccessKey string) { } return *flagS3Provider, *flagS3Endpoint, *flagS3AccessKeyID, *flagS3SecretAccessKey } + +func S3BucketPath(bucket string) string { + if !flag.Parsed() { + flag.Parse() + } + return filepath.Join(*flagS3DataDir, bucket) +}