From 90cc61b432feb3744cc71aa82bd55785e4646396 Mon Sep 17 00:00:00 2001 From: okJiang <819421878@qq.com> Date: Thu, 21 Nov 2024 16:33:35 +0800 Subject: [PATCH] scheduler: add test for creating evict-leader-scheduler twice (#8757) close tikv/pd#8756 Signed-off-by: okJiang <819421878@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/core/store.go | 2 + pkg/schedule/schedulers/grant_leader.go | 4 +- pkg/schedule/schedulers/init.go | 4 +- plugin/scheduler_example/evict_leader.go | 4 +- tests/integrations/realcluster/cluster.go | 2 +- .../realcluster/scheduler_test.go | 71 +++++++++++++++++++ 6 files changed, 81 insertions(+), 6 deletions(-) diff --git a/pkg/core/store.go b/pkg/core/store.go index 2cd924ad339..5edf59f6dce 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -787,6 +787,7 @@ func (s *StoresInfo) ResetStores() { func (s *StoresInfo) PauseLeaderTransfer(storeID uint64, direction constant.Direction) error { s.Lock() defer s.Unlock() + log.Info("pause store leader transfer", zap.Uint64("store-id", storeID), zap.String("direction", direction.String())) store, ok := s.stores[storeID] if !ok { return errs.ErrStoreNotFound.FastGenByArgs(storeID) @@ -810,6 +811,7 @@ func (s *StoresInfo) PauseLeaderTransfer(storeID uint64, direction constant.Dire func (s *StoresInfo) ResumeLeaderTransfer(storeID uint64, direction constant.Direction) { s.Lock() defer s.Unlock() + log.Info("resume store leader transfer", zap.Uint64("store-id", storeID), zap.String("direction", direction.String())) store, ok := s.stores[storeID] if !ok { log.Warn("try to clean a store's pause state, but it is not found. It may be cleanup", diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index f83c2396ace..b18d1fc3397 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -45,7 +45,7 @@ type grantLeaderSchedulerConfig struct { } func (conf *grantLeaderSchedulerConfig) buildWithArgs(args []string) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -271,6 +271,7 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R err := handler.config.buildWithArgs(args) if err != nil { + log.Error("fail to build config", errs.ZapError(err)) handler.config.Lock() handler.config.cluster.ResumeLeaderTransfer(id, constant.Out) handler.config.Unlock() @@ -279,6 +280,7 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R } err = handler.config.persist() if err != nil { + log.Error("fail to persist config", errs.ZapError(err)) _, _ = handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 48b4f1c4239..51d857ae445 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -134,7 +134,7 @@ func schedulersRegister() { // evict leader RegisterSliceDecoderBuilder(types.EvictLeaderScheduler, func(args []string) ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } conf, ok := v.(*evictLeaderSchedulerConfig) @@ -268,7 +268,7 @@ func schedulersRegister() { // grant leader RegisterSliceDecoderBuilder(types.GrantLeaderScheduler, func(args []string) ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 7164f36b1bb..eb976edf851 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -50,7 +50,7 @@ const ( func init() { schedulers.RegisterSliceDecoderBuilder(userEvictLeaderScheduler, func(args []string) schedulers.ConfigDecoder { return func(v any) error { - if len(args) != 1 { + if len(args) < 1 { return errors.New("should specify the store-id") } conf, ok := v.(*evictLeaderSchedulerConfig) @@ -101,7 +101,7 @@ type evictLeaderSchedulerConfig struct { // BuildWithArgs builds the config with the args. func (conf *evictLeaderSchedulerConfig) BuildWithArgs(args []string) error { - if len(args) != 1 { + if len(args) < 1 { return errors.New("should specify the store-id") } diff --git a/tests/integrations/realcluster/cluster.go b/tests/integrations/realcluster/cluster.go index 4dc80b5d433..5068dc3b73b 100644 --- a/tests/integrations/realcluster/cluster.go +++ b/tests/integrations/realcluster/cluster.go @@ -38,7 +38,7 @@ type clusterSuite struct { } var ( - playgroundLogDir = filepath.Join("tmp", "real_cluster", "playground") + playgroundLogDir = "/tmp/real_cluster/playground" tiupBin = os.Getenv("HOME") + "/.tiup/bin/tiup" ) diff --git a/tests/integrations/realcluster/scheduler_test.go b/tests/integrations/realcluster/scheduler_test.go index 369ca176233..ed0694a985d 100644 --- a/tests/integrations/realcluster/scheduler_test.go +++ b/tests/integrations/realcluster/scheduler_test.go @@ -21,12 +21,14 @@ import ( "testing" "time" + "github.com/pingcap/log" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/tikv/pd/client/http" "github.com/tikv/pd/client/utils/testutil" "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/types" + "go.uber.org/zap" ) type schedulerSuite struct { @@ -201,3 +203,72 @@ func (s *schedulerSuite) TestRegionLabelDenyScheduler() { return true }, testutil.WithWaitFor(time.Minute)) } + +func (s *schedulerSuite) TestGrantOrEvictLeaderTwice() { + re := require.New(s.T()) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pdHTTPCli := http.NewClient("pd-real-cluster-test", getPDEndpoints(s.T())) + regions, err := pdHTTPCli.GetRegions(ctx) + re.NoError(err) + re.NotEmpty(regions.Regions) + region1 := regions.Regions[0] + + var i int + evictLeader := func() { + re.NoError(pdHTTPCli.CreateScheduler(ctx, types.EvictLeaderScheduler.String(), uint64(region1.Leader.StoreID))) + // if the second evict leader scheduler cause the pause-leader-filter + // disable, the balance-leader-scheduler need some time to transfer + // leader. See details in https://github.com/tikv/pd/issues/8756. + if i == 1 { + time.Sleep(3 * time.Second) + } + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + if err != nil { + log.Error("get regions failed", zap.Error(err)) + return false + } + for _, region := range regions.Regions { + if region.Leader.StoreID == region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(time.Minute)) + + i++ + } + + evictLeader() + evictLeader() + pdHTTPCli.DeleteScheduler(ctx, types.EvictLeaderScheduler.String()) + + i = 0 + grantLeader := func() { + re.NoError(pdHTTPCli.CreateScheduler(ctx, types.GrantLeaderScheduler.String(), uint64(region1.Leader.StoreID))) + if i == 1 { + time.Sleep(3 * time.Second) + } + testutil.Eventually(re, func() bool { + regions, err := pdHTTPCli.GetRegions(ctx) + if err != nil { + log.Error("get regions failed", zap.Error(err)) + return false + } + for _, region := range regions.Regions { + if region.Leader.StoreID != region1.Leader.StoreID { + return false + } + } + return true + }, testutil.WithWaitFor(2*time.Minute)) + + i++ + } + + grantLeader() + grantLeader() + pdHTTPCli.DeleteScheduler(ctx, types.GrantLeaderScheduler.String()) +}