diff --git a/dist/etc/scylla-manager.yaml b/dist/etc/scylla-manager.yaml index 5d9f52314e..307c1fb020 100644 --- a/dist/etc/scylla-manager.yaml +++ b/dist/etc/scylla-manager.yaml @@ -34,6 +34,10 @@ http: 127.0.0.1:5080 # Debug server that allows to run pporf profiling on demand on a live system. #debug: 127.0.0.1:5112 +# Set the validity timeout for Scylla Manager Agent API clients cached by Scylla Manager. +# Use 0 to disable the cache. +#client_cache_timeout: 15m + # Logging configuration. #logger: # Where to print logs, stderr or stdout. diff --git a/pkg/cmd/scylla-manager/server.go b/pkg/cmd/scylla-manager/server.go index ee2e2001de..308dbad69a 100644 --- a/pkg/cmd/scylla-manager/server.go +++ b/pkg/cmd/scylla-manager/server.go @@ -71,7 +71,8 @@ func (s *server) makeServices() error { drawerStore := store.NewTableStore(s.session, table.Drawer) secretsStore := store.NewTableStore(s.session, table.Secrets) - s.clusterSvc, err = cluster.NewService(s.session, metrics.NewClusterMetrics().MustRegister(), secretsStore, s.config.TimeoutConfig, s.logger.Named("cluster")) + s.clusterSvc, err = cluster.NewService(s.session, metrics.NewClusterMetrics().MustRegister(), secretsStore, s.config.TimeoutConfig, + s.config.ClientCacheTimeout, s.logger.Named("cluster")) if err != nil { return errors.Wrapf(err, "cluster service") } diff --git a/pkg/config/server/server.go b/pkg/config/server/server.go index 2e630d3656..0f39b3bf34 100644 --- a/pkg/config/server/server.go +++ b/pkg/config/server/server.go @@ -46,22 +46,23 @@ type SSLConfig struct { // Config contains configuration structure for scylla manager. type Config struct { - HTTP string `yaml:"http"` - HTTPS string `yaml:"https"` - TLSVersion config.TLSVersion `yaml:"tls_version"` - TLSCertFile string `yaml:"tls_cert_file"` - TLSKeyFile string `yaml:"tls_key_file"` - TLSCAFile string `yaml:"tls_ca_file"` - Prometheus string `yaml:"prometheus"` - Debug string `yaml:"debug"` - Logger config.LogConfig `yaml:"logger"` - Database DBConfig `yaml:"database"` - SSL SSLConfig `yaml:"ssl"` - Healthcheck healthcheck.Config `yaml:"healthcheck"` - Backup backup.Config `yaml:"backup"` - Restore restore.Config `yaml:"restore"` - Repair repair.Config `yaml:"repair"` - TimeoutConfig scyllaclient.TimeoutConfig `yaml:"agent_client"` + HTTP string `yaml:"http"` + HTTPS string `yaml:"https"` + TLSVersion config.TLSVersion `yaml:"tls_version"` + TLSCertFile string `yaml:"tls_cert_file"` + TLSKeyFile string `yaml:"tls_key_file"` + TLSCAFile string `yaml:"tls_ca_file"` + Prometheus string `yaml:"prometheus"` + Debug string `yaml:"debug"` + ClientCacheTimeout time.Duration `yaml:"client_cache_timeout"` + Logger config.LogConfig `yaml:"logger"` + Database DBConfig `yaml:"database"` + SSL SSLConfig `yaml:"ssl"` + Healthcheck healthcheck.Config `yaml:"healthcheck"` + Backup backup.Config `yaml:"backup"` + Restore restore.Config `yaml:"restore"` + Repair repair.Config `yaml:"repair"` + TimeoutConfig scyllaclient.TimeoutConfig `yaml:"agent_client"` } func DefaultConfig() Config { @@ -83,11 +84,12 @@ func DefaultConfig() Config { SSL: SSLConfig{ Validate: true, }, - Healthcheck: healthcheck.DefaultConfig(), - Backup: backup.DefaultConfig(), - Restore: restore.DefaultConfig(), - Repair: repair.DefaultConfig(), - TimeoutConfig: scyllaclient.DefaultTimeoutConfig(), + Healthcheck: healthcheck.DefaultConfig(), + ClientCacheTimeout: 15 * time.Minute, + Backup: backup.DefaultConfig(), + Restore: restore.DefaultConfig(), + Repair: repair.DefaultConfig(), + TimeoutConfig: scyllaclient.DefaultTimeoutConfig(), } } diff --git a/pkg/config/server/server_test.go b/pkg/config/server/server_test.go index fac23f8631..889d27264d 100644 --- a/pkg/config/server/server_test.go +++ b/pkg/config/server/server_test.go @@ -37,14 +37,15 @@ func TestConfigModification(t *testing.T) { } golden := server.Config{ - HTTP: "127.0.0.1:80", - HTTPS: "127.0.0.1:443", - TLSVersion: "TLSv1.3", - TLSCertFile: "tls.cert", - TLSKeyFile: "tls.key", - TLSCAFile: "ca.cert", - Prometheus: "127.0.0.1:9090", - Debug: "127.0.0.1:112", + HTTP: "127.0.0.1:80", + HTTPS: "127.0.0.1:443", + TLSVersion: "TLSv1.3", + TLSCertFile: "tls.cert", + TLSKeyFile: "tls.key", + TLSCAFile: "ca.cert", + Prometheus: "127.0.0.1:9090", + Debug: "127.0.0.1:112", + ClientCacheTimeout: 15 * time.Minute, Logger: config.LogConfig{ Config: log.Config{ Mode: log.StderrMode, diff --git a/pkg/scyllaclient/provider.go b/pkg/scyllaclient/provider.go index 6be7ecb4a7..888ffe5b8c 100644 --- a/pkg/scyllaclient/provider.go +++ b/pkg/scyllaclient/provider.go @@ -15,9 +15,6 @@ import ( // ProviderFunc is a function that returns a Client for a given cluster. type ProviderFunc func(ctx context.Context, clusterID uuid.UUID) (*Client, error) -// hostCheckValidity specifies how often to check if hosts changed. -const hostCheckValidity = 15 * time.Minute - type clientTTL struct { client *Client ttl time.Time @@ -32,10 +29,10 @@ type CachedProvider struct { logger log.Logger } -func NewCachedProvider(f ProviderFunc, logger log.Logger) *CachedProvider { +func NewCachedProvider(f ProviderFunc, cacheInvalidationTimeout time.Duration, logger log.Logger) *CachedProvider { return &CachedProvider{ inner: f, - validity: hostCheckValidity, + validity: cacheInvalidationTimeout, clients: make(map[uuid.UUID]clientTTL), logger: logger.Named("cache-provider"), } @@ -49,16 +46,12 @@ func (p *CachedProvider) Client(ctx context.Context, clusterID uuid.UUID) (*Clie // Cache hit if ok { - if c.ttl.After(timeutc.Now()) { - return c.client, nil - } - // Check if hosts did not change before returning changed, err := c.client.CheckHostsChanged(ctx) if err != nil { p.logger.Error(ctx, "Cannot check if hosts changed", "error", err) } - if !changed && err == nil { + if c.ttl.After(timeutc.Now()) && !changed && err == nil { return c.client, nil } } diff --git a/pkg/service/cluster/service.go b/pkg/service/cluster/service.go index 550b732bfd..4fcc05f5a4 100644 --- a/pkg/service/cluster/service.go +++ b/pkg/service/cluster/service.go @@ -9,6 +9,7 @@ import ( "fmt" "sort" "strconv" + "time" "github.com/gocql/gocql" "github.com/pkg/errors" @@ -56,7 +57,9 @@ type Service struct { onChangeListener func(ctx context.Context, c Change) error } -func NewService(session gocqlx.Session, metrics metrics.ClusterMetrics, secretsStore store.Store, timeoutConfig scyllaclient.TimeoutConfig, l log.Logger) (*Service, error) { +func NewService(session gocqlx.Session, metrics metrics.ClusterMetrics, secretsStore store.Store, timeoutConfig scyllaclient.TimeoutConfig, + cacheInvalidationTimeout time.Duration, l log.Logger, +) (*Service, error) { if session.Session == nil || session.Closed() { return nil, errors.New("invalid session") } @@ -68,7 +71,7 @@ func NewService(session gocqlx.Session, metrics metrics.ClusterMetrics, secretsS logger: l, timeoutConfig: timeoutConfig, } - s.clientCache = scyllaclient.NewCachedProvider(s.createClient) + s.clientCache = scyllaclient.NewCachedProvider(s.createClient, cacheInvalidationTimeout, l) return s, nil } diff --git a/pkg/service/cluster/service_integration_test.go b/pkg/service/cluster/service_integration_test.go index 4517c9069c..5d324f8b48 100644 --- a/pkg/service/cluster/service_integration_test.go +++ b/pkg/service/cluster/service_integration_test.go @@ -16,6 +16,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" "github.com/scylladb/go-log" + "github.com/scylladb/scylla-manager/v3/pkg/config/server" . "github.com/scylladb/scylla-manager/v3/pkg/testutils/testconfig" "github.com/scylladb/scylla-manager/v3/pkg/metrics" @@ -35,7 +36,8 @@ func TestClientIntegration(t *testing.T) { session := CreateScyllaManagerDBSession(t) secretsStore := store.NewTableStore(session, table.Secrets) - s, err := cluster.NewService(session, metrics.NewClusterMetrics(), secretsStore, scyllaclient.DefaultTimeoutConfig(), log.NewDevelopment()) + s, err := cluster.NewService(session, metrics.NewClusterMetrics(), secretsStore, scyllaclient.DefaultTimeoutConfig(), + server.DefaultConfig().ClientCacheTimeout, log.NewDevelopment()) if err != nil { t.Fatal(err) } @@ -103,7 +105,8 @@ func TestServiceStorageIntegration(t *testing.T) { secretsStore := store.NewTableStore(session, table.Secrets) - s, err := cluster.NewService(session, metrics.NewClusterMetrics(), secretsStore, scyllaclient.DefaultTimeoutConfig(), log.NewDevelopment()) + s, err := cluster.NewService(session, metrics.NewClusterMetrics(), secretsStore, scyllaclient.DefaultTimeoutConfig(), + server.DefaultConfig().ClientCacheTimeout, log.NewDevelopment()) if err != nil { t.Fatal(err) } diff --git a/pkg/service/healthcheck/service_integration_test.go b/pkg/service/healthcheck/service_integration_test.go index bc6f200b02..798be33cbd 100644 --- a/pkg/service/healthcheck/service_integration_test.go +++ b/pkg/service/healthcheck/service_integration_test.go @@ -42,7 +42,8 @@ func TestStatusIntegration(t *testing.T) { defer session.Close() s := store.NewTableStore(session, table.Secrets) - clusterSvc, err := cluster.NewService(session, metrics.NewClusterMetrics(), s, scyllaclient.DefaultTimeoutConfig(), log.NewDevelopment()) + clusterSvc, err := cluster.NewService(session, metrics.NewClusterMetrics(), s, scyllaclient.DefaultTimeoutConfig(), + 15*time.Minute, log.NewDevelopment()) if err != nil { t.Fatal(err) } @@ -69,7 +70,8 @@ func TestStatusWithCQLCredentialsIntegration(t *testing.T) { defer session.Close() s := store.NewTableStore(session, table.Secrets) - clusterSvc, err := cluster.NewService(session, metrics.NewClusterMetrics(), s, scyllaclient.DefaultTimeoutConfig(), log.NewDevelopment()) + clusterSvc, err := cluster.NewService(session, metrics.NewClusterMetrics(), s, scyllaclient.DefaultTimeoutConfig(), + 15*time.Minute, log.NewDevelopment()) if err != nil { t.Fatal(err) }