From d14cc0c4225420bb4eba924b07e5b476263e9300 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Thu, 28 Mar 2024 17:17:47 +0800 Subject: [PATCH 1/2] fix the down peer cannot be repaired Signed-off-by: Ryan Leung --- pkg/schedule/checker/replica_strategy.go | 7 ++- pkg/schedule/checker/rule_checker_test.go | 60 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pkg/schedule/checker/replica_strategy.go b/pkg/schedule/checker/replica_strategy.go index fdf05a0c479..ad85e307bbe 100644 --- a/pkg/schedule/checker/replica_strategy.go +++ b/pkg/schedule/checker/replica_strategy.go @@ -98,7 +98,12 @@ func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, o } // trick to avoid creating a slice with `old` removed. s.swapStoreToFirst(coLocationStores, old) - return s.SelectStoreToAdd(coLocationStores[1:]) + // If the coLocationStores only has one store, no need to remove. + // Otherwise, the other stores will be filtered. + if len(coLocationStores) > 1 { + coLocationStores = coLocationStores[1:] + } + return s.SelectStoreToAdd(coLocationStores) } // SelectStoreToImprove returns a store to replace oldStore. The location diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index d4d37de2c3c..012fecda79e 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -2061,3 +2061,63 @@ func (suite *ruleCheckerTestSuite) TestRemoveOrphanPeer() { suite.NotNil(op) suite.Equal("remove-orphan-peer", op.Desc()) } + +func (suite *ruleCheckerTestSuite) TestIssue7808() { + re := suite.Require() + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2", "disk_type": "mix"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "disk_type": "ssd"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5", "disk_type": "ssd"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", 3, 4, 1) + err := suite.ruleManager.SetRules([]*placement.Rule{ + { + GroupID: "pd", + ID: "1", + Role: placement.Voter, + Count: 2, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "ssd", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + { + GroupID: "pd", + ID: "2", + Role: placement.Follower, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "disk_type", + Values: []string{ + "mix", + }, + Op: placement.In, + }, + }, + LocationLabels: []string{"host"}, + IsolationLevel: "host", + }, + }) + re.NoError(err) + err = suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) + re.NoError(err) + suite.cluster.SetStoreDown(1) + region := suite.cluster.GetRegion(1) + downPeer := []*pdpb.PeerStats{ + {Peer: region.GetStorePeer(1), DownSeconds: 6000}, + } + region = region.Clone(core.WithDownPeers(downPeer)) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + re.NotNil(op) + re.Equal("fast-replace-rule-down-peer", op.Desc()) + re.Contains(op.Brief(), "mv peer: store [1] to [2]") +} From cb3896eb31638e98937edf5c341d79123b711630 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Tue, 21 May 2024 11:08:24 +0800 Subject: [PATCH 2/2] fix Signed-off-by: Ryan Leung --- pkg/schedule/checker/rule_checker_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 012fecda79e..f3ef8ed8d23 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -2107,7 +2107,7 @@ func (suite *ruleCheckerTestSuite) TestIssue7808() { }, }) re.NoError(err) - err = suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) + err = suite.ruleManager.DeleteRule("pd", "default") re.NoError(err) suite.cluster.SetStoreDown(1) region := suite.cluster.GetRegion(1)