diff --git a/pkg/scyllaclient/client_scylla.go b/pkg/scyllaclient/client_scylla.go index 2eaadaacf7..da2623121b 100644 --- a/pkg/scyllaclient/client_scylla.go +++ b/pkg/scyllaclient/client_scylla.go @@ -1049,6 +1049,15 @@ func (c *Client) ViewBuildStatus(ctx context.Context, keyspace, view string) (Vi return minStatus, nil } +// ControlTabletLoadBalancing disables or enables tablet load balancing in cluster. +func (c *Client) ControlTabletLoadBalancing(ctx context.Context, enabled bool) error { + _, err := c.scyllaOps.StorageServiceTabletsBalancingPost(&operations.StorageServiceTabletsBalancingPostParams{ + Context: ctx, + Enabled: enabled, + }) + return err +} + // ToCanonicalIP replaces ":0:0" in IPv6 addresses with "::" // ToCanonicalIP("192.168.0.1") -> "192.168.0.1" // ToCanonicalIP("100:200:0:0:0:0:0:1") -> "100:200::1". diff --git a/pkg/service/backup/worker_snapshot.go b/pkg/service/backup/worker_snapshot.go index 875dc41aa3..87b164baf9 100644 --- a/pkg/service/backup/worker_snapshot.go +++ b/pkg/service/backup/worker_snapshot.go @@ -4,12 +4,31 @@ package backup import ( "context" + stdErrors "errors" "github.com/pkg/errors" + "github.com/scylladb/scylla-manager/v3/pkg/scyllaclient" . "github.com/scylladb/scylla-manager/v3/pkg/service/backup/backupspec" ) func (w *worker) Snapshot(ctx context.Context, hosts []hostInfo, limits []DCLimit) (err error) { + snapshotTabletKs := false + ringDescriber := scyllaclient.NewRingDescriber(ctx, w.Client) + for _, u := range w.Units { + snapshotTabletKs = snapshotTabletKs || ringDescriber.IsTabletKeyspace(u.Keyspace) + } + // Disable tablet migration for the snapshot stage. + // Without that it could be possible that some tablet "escapes" being + // a part of any snapshot by migrating from not yet snapshot-ed host to already snapshot-ed one. + if snapshotTabletKs { + defer func() { + err = stdErrors.Join(err, w.Client.ControlTabletLoadBalancing(context.Background(), true)) + }() + if err := w.Client.ControlTabletLoadBalancing(ctx, false); err != nil { + return errors.Wrapf(err, "disable tablet load balancing") + } + } + f := func(h hostInfo) error { w.Logger.Info(ctx, "Taking snapshots on host", "host", h.IP) err := w.snapshotHost(ctx, h)