Skip to content

Commit

Permalink
This is an automated cherry-pick of tikv#7122
Browse files Browse the repository at this point in the history
close tikv#7121

Signed-off-by: ti-chi-bot <[email protected]>
  • Loading branch information
rleungx authored and ti-chi-bot committed Sep 21, 2023
1 parent e8f95ac commit 804b1fb
Show file tree
Hide file tree
Showing 12 changed files with 618 additions and 19 deletions.
466 changes: 466 additions & 0 deletions pkg/mcs/scheduling/server/cluster.go

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions pkg/mock/mockcluster/mockcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,13 @@ func (mc *Cluster) AllocPeer(storeID uint64) (*metapb.Peer, error) {

func (mc *Cluster) initRuleManager() {
if mc.RuleManager == nil {
<<<<<<< HEAD
mc.RuleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), mc, mc.GetOpts())
mc.RuleManager.Initialize(int(mc.GetReplicationConfig().MaxReplicas), mc.GetReplicationConfig().LocationLabels)
=======
mc.RuleManager = placement.NewRuleManager(mc.GetStorage(), mc, mc.GetSharedConfig())
mc.RuleManager.Initialize(int(mc.GetReplicationConfig().MaxReplicas), mc.GetReplicationConfig().LocationLabels, mc.GetReplicationConfig().IsolationLevel)
>>>>>>> 5b3d0172b (*: fix sync isolation level to default placement rule (#7122))
}
}

Expand Down
4 changes: 3 additions & 1 deletion server/api/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,9 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul
if testCase.placementRuleEnable {
err := suite.svr.GetRaftCluster().GetRuleManager().Initialize(
suite.svr.GetRaftCluster().GetOpts().GetMaxReplicas(),
suite.svr.GetRaftCluster().GetOpts().GetLocationLabels())
suite.svr.GetRaftCluster().GetOpts().GetLocationLabels(),
suite.svr.GetRaftCluster().GetOpts().GetIsolationLevel(),
)
suite.NoError(err)
}
if len(testCase.rules) > 0 {
Expand Down
2 changes: 1 addition & 1 deletion server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func (c *RaftCluster) Start(s Server) error {

c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts())
if c.opt.IsPlacementRulesEnabled() {
err = c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels())
err = c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels(), c.opt.GetIsolationLevel())
if err != nil {
return err
}
Expand Down
10 changes: 5 additions & 5 deletions server/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ func TestSetOfflineStore(t *testing.T) {
cluster.coordinator = newCoordinator(ctx, cluster, nil)
cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts())
if opt.IsPlacementRulesEnabled() {
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels())
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel())
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -424,7 +424,7 @@ func TestUpStore(t *testing.T) {
cluster.coordinator = newCoordinator(ctx, cluster, nil)
cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts())
if opt.IsPlacementRulesEnabled() {
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels())
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel())
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -527,7 +527,7 @@ func TestDeleteStoreUpdatesClusterVersion(t *testing.T) {
cluster.coordinator = newCoordinator(ctx, cluster, nil)
cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts())
if opt.IsPlacementRulesEnabled() {
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels())
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel())
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -1246,7 +1246,7 @@ func TestOfflineAndMerge(t *testing.T) {
cluster.coordinator = newCoordinator(ctx, cluster, nil)
cluster.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), cluster, cluster.GetOpts())
if opt.IsPlacementRulesEnabled() {
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels())
err := cluster.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel())
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -1992,7 +1992,7 @@ func newTestRaftCluster(
rc.InitCluster(id, opt, s, basicCluster)
rc.ruleManager = placement.NewRuleManager(storage.NewStorageWithMemoryBackend(), rc, opt)
if opt.IsPlacementRulesEnabled() {
err := rc.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels())
err := rc.ruleManager.Initialize(opt.GetMaxReplicas(), opt.GetLocationLabels(), opt.GetIsolationLevel())
if err != nil {
panic(err)
}
Expand Down
7 changes: 7 additions & 0 deletions server/config/persist_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@ func (o *PersistOptions) SetEnableWitness(enable bool) {
o.SetScheduleConfig(v)
}

// SetMaxStoreDownTime to set the max store down time. It's only used to test.
func (o *PersistOptions) SetMaxStoreDownTime(time time.Duration) {
v := o.GetScheduleConfig().Clone()
v.MaxStoreDownTime = typeutil.NewDuration(time)
o.SetScheduleConfig(v)
}

// SetMaxMergeRegionSize sets the max merge region size.
func (o *PersistOptions) SetMaxMergeRegionSize(maxMergeRegionSize uint64) {
v := o.GetScheduleConfig().Clone()
Expand Down
35 changes: 35 additions & 0 deletions server/schedule/checker/rule_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,41 @@ func (suite *ruleCheckerTestSuite) TestAddRulePeerWithIsolationLevel() {
suite.Equal(uint64(4), op.Step(0).(operator.AddLearner).ToStore)
}

func (suite *ruleCheckerTestSuite) TestReplaceDownPeerWithIsolationLevel() {
suite.cluster.SetMaxStoreDownTime(100 * time.Millisecond)
suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "host": "h2"})
suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z2", "host": "h3"})
suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z2", "host": "h4"})
suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z3", "host": "h5"})
suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z3", "host": "h6"})
suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 3, 5)
suite.ruleManager.DeleteRule("pd", "default")
suite.ruleManager.SetRule(&placement.Rule{
GroupID: "pd",
ID: "test",
Index: 100,
Override: true,
Role: placement.Voter,
Count: 3,
LocationLabels: []string{"zone", "host"},
IsolationLevel: "zone",
})
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.Nil(op)
region := suite.cluster.GetRegion(1)
downPeer := []*pdpb.PeerStats{
{Peer: region.GetStorePeer(5), DownSeconds: 6000},
}
region = region.Clone(core.WithDownPeers(downPeer))
suite.cluster.PutRegion(region)
suite.cluster.SetStoreDown(5)
suite.cluster.SetStoreDown(6)
time.Sleep(200 * time.Millisecond)
op = suite.rc.Check(suite.cluster.GetRegion(1))
suite.Nil(op)
}

func (suite *ruleCheckerTestSuite) TestFixPeer() {
suite.cluster.AddLeaderStore(1, 1)
suite.cluster.AddLeaderStore(2, 1)
Expand Down
39 changes: 38 additions & 1 deletion server/schedule/placement/rule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func NewRuleManager(storage endpoint.RuleStorage, storeSetInformer core.StoreSet

// Initialize loads rules from storage. If Placement Rules feature is never enabled, it creates default rule that is
// compatible with previous configuration.
func (m *RuleManager) Initialize(maxReplica int, locationLabels []string) error {
func (m *RuleManager) Initialize(maxReplica int, locationLabels []string, isolationLevel string) error {
m.Lock()
defer m.Unlock()
if m.initialized {
Expand All @@ -79,12 +79,49 @@ func (m *RuleManager) Initialize(maxReplica int, locationLabels []string) error
}
if len(m.ruleConfig.rules) == 0 {
// migrate from old config.
<<<<<<< HEAD:server/schedule/placement/rule_manager.go
defaultRule := &Rule{
GroupID: "pd",
ID: "default",
Role: Voter,
Count: maxReplica,
LocationLabels: locationLabels,
=======
var defaultRules []*Rule
if m.conf != nil && m.conf.IsWitnessAllowed() && maxReplica >= 3 {
// Because maxReplica is actually always an odd number, so directly divided by 2
witnessCount := maxReplica / 2
defaultRules = append(defaultRules,
[]*Rule{
{
GroupID: "pd",
ID: "default",
Role: Voter,
Count: maxReplica - witnessCount,
LocationLabels: locationLabels,
IsolationLevel: isolationLevel,
},
{
GroupID: "pd",
ID: "witness",
Role: Voter,
Count: witnessCount,
IsWitness: true,
LocationLabels: locationLabels,
IsolationLevel: isolationLevel,
},
}...,
)
} else {
defaultRules = append(defaultRules, &Rule{
GroupID: "pd",
ID: "default",
Role: Voter,
Count: maxReplica,
LocationLabels: locationLabels,
IsolationLevel: isolationLevel,
})
>>>>>>> 5b3d0172b (*: fix sync isolation level to default placement rule (#7122)):pkg/schedule/placement/rule_manager.go
}
if err := m.storage.SaveRule(defaultRule.StoreKey(), defaultRule); err != nil {
return err
Expand Down
10 changes: 8 additions & 2 deletions server/schedule/placement/rule_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,14 @@ func newTestManager(t *testing.T) (endpoint.RuleStorage, *RuleManager) {
re := require.New(t)
store := storage.NewStorageWithMemoryBackend()
var err error
<<<<<<< HEAD:server/schedule/placement/rule_manager_test.go
manager := NewRuleManager(store, nil, nil)
err = manager.Initialize(3, []string{"zone", "rack", "host"})
=======
manager := NewRuleManager(store, nil, mockconfig.NewTestOptions())
manager.conf.SetEnableWitness(enableWitness)
err = manager.Initialize(3, []string{"zone", "rack", "host"}, "")
>>>>>>> 5b3d0172b (*: fix sync isolation level to default placement rule (#7122)):pkg/schedule/placement/rule_manager_test.go
re.NoError(err)
return store, manager
}
Expand Down Expand Up @@ -121,7 +127,7 @@ func TestSaveLoad(t *testing.T) {
}

m2 := NewRuleManager(store, nil, nil)
err := m2.Initialize(3, []string{"no", "labels"})
err := m2.Initialize(3, []string{"no", "labels"}, "")
re.NoError(err)
re.Len(m2.GetAllRules(), 3)
re.Equal(rules[0].String(), m2.GetRule("pd", "default").String())
Expand All @@ -137,7 +143,7 @@ func TestSetAfterGet(t *testing.T) {
manager.SetRule(rule)

m2 := NewRuleManager(store, nil, nil)
err := m2.Initialize(100, []string{})
err := m2.Initialize(100, []string{}, "")
re.NoError(err)
rule = m2.GetRule("pd", "default")
re.Equal(1, rule.Count)
Expand Down
15 changes: 12 additions & 3 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ func (s *Server) SetReplicationConfig(cfg config.ReplicationConfig) error {
}
if cfg.EnablePlacementRules {
// initialize rule manager.
if err := rc.GetRuleManager().Initialize(int(cfg.MaxReplicas), cfg.LocationLabels); err != nil {
if err := rc.GetRuleManager().Initialize(int(cfg.MaxReplicas), cfg.LocationLabels, cfg.IsolationLevel); err != nil {
return err
}
} else {
Expand All @@ -978,19 +978,27 @@ func (s *Server) SetReplicationConfig(cfg config.ReplicationConfig) error {
defaultRule := rc.GetRuleManager().GetRule("pd", "default")

CheckInDefaultRule := func() error {
// replication config won't work when placement rule is enabled and exceeds one default rule
// replication config won't work when placement rule is enabled and exceeds one default rule
if !(defaultRule != nil &&
len(defaultRule.StartKey) == 0 && len(defaultRule.EndKey) == 0) {
return errors.New("cannot update MaxReplicas or LocationLabels when placement rules feature is enabled and not only default rule exists, please update rule instead")
return errors.New("cannot update MaxReplicas, LocationLabels or IsolationLevel when placement rules feature is enabled and not only default rule exists, please update rule instead")
}
<<<<<<< HEAD
if !(defaultRule.Count == int(old.MaxReplicas) && typeutil.StringsEqual(defaultRule.LocationLabels, []string(old.LocationLabels))) {
=======
if !(defaultRule.Count == int(old.MaxReplicas) && typeutil.AreStringSlicesEqual(defaultRule.LocationLabels, []string(old.LocationLabels)) && defaultRule.IsolationLevel == old.IsolationLevel) {
>>>>>>> 5b3d0172b (*: fix sync isolation level to default placement rule (#7122))
return errors.New("cannot to update replication config, the default rules do not consistent with replication config, please update rule instead")
}

return nil
}

<<<<<<< HEAD
if !(cfg.MaxReplicas == old.MaxReplicas && typeutil.StringsEqual(cfg.LocationLabels, old.LocationLabels)) {
=======
if !(cfg.MaxReplicas == old.MaxReplicas && typeutil.AreStringSlicesEqual(cfg.LocationLabels, old.LocationLabels) && cfg.IsolationLevel == old.IsolationLevel) {
>>>>>>> 5b3d0172b (*: fix sync isolation level to default placement rule (#7122))
if err := CheckInDefaultRule(); err != nil {
return err
}
Expand All @@ -1001,6 +1009,7 @@ func (s *Server) SetReplicationConfig(cfg config.ReplicationConfig) error {
if rule != nil {
rule.Count = int(cfg.MaxReplicas)
rule.LocationLabels = cfg.LocationLabels
rule.IsolationLevel = cfg.IsolationLevel
rc := s.GetRaftCluster()
if rc == nil {
return errs.ErrNotBootstrapped.GenWithStackByArgs()
Expand Down
4 changes: 2 additions & 2 deletions server/statistics/region_collection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestRegionStatistics(t *testing.T) {
re := require.New(t)
store := storage.NewStorageWithMemoryBackend()
manager := placement.NewRuleManager(store, nil, nil)
err := manager.Initialize(3, []string{"zone", "rack", "host"})
err := manager.Initialize(3, []string{"zone", "rack", "host"}, "")
re.NoError(err)
opt := config.NewTestOptions()
opt.SetPlacementRuleEnabled(false)
Expand Down Expand Up @@ -136,7 +136,7 @@ func TestRegionStatisticsWithPlacementRule(t *testing.T) {
re := require.New(t)
store := storage.NewStorageWithMemoryBackend()
manager := placement.NewRuleManager(store, nil, nil)
err := manager.Initialize(3, []string{"zone", "rack", "host"})
err := manager.Initialize(3, []string{"zone", "rack", "host"}, "")
re.NoError(err)
opt := config.NewTestOptions()
opt.SetPlacementRuleEnabled(true)
Expand Down
40 changes: 36 additions & 4 deletions tests/pdctl/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
re.Equal(expect, replicationCfg.MaxReplicas)
}

checkLocaltionLabels := func(expect int) {
checkLocationLabels := func(expect int) {
args := []string{"-u", pdAddr, "config", "show", "replication"}
output, err := pdctl.ExecuteCommand(cmd, args...)
re.NoError(err)
Expand All @@ -671,6 +671,15 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
re.Len(replicationCfg.LocationLabels, expect)
}

checkIsolationLevel := func(expect string) {
args := []string{"-u", pdAddr, "config", "show", "replication"}
output, err := pdctl.ExecuteCommand(cmd, args...)
re.NoError(err)
replicationCfg := sc.ReplicationConfig{}
re.NoError(json.Unmarshal(output, &replicationCfg))
re.Equal(replicationCfg.IsolationLevel, expect)
}

checkRuleCount := func(expect int) {
args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"}
output, err := pdctl.ExecuteCommand(cmd, args...)
Expand All @@ -689,6 +698,15 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
re.Len(rule.LocationLabels, expect)
}

checkRuleIsolationLevel := func(expect string) {
args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", "pd", "--id", "default"}
output, err := pdctl.ExecuteCommand(cmd, args...)
re.NoError(err)
rule := placement.Rule{}
re.NoError(json.Unmarshal(output, &rule))
re.Equal(rule.IsolationLevel, expect)
}

// update successfully when placement rules is not enabled.
output, err := pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "max-replicas", "2")
re.NoError(err)
Expand All @@ -697,8 +715,13 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "location-labels", "zone,host")
re.NoError(err)
re.Contains(string(output), "Success!")
checkLocaltionLabels(2)
output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "isolation-level", "zone")
re.NoError(err)
re.Contains(string(output), "Success!")
checkLocationLabels(2)
checkRuleLocationLabels(2)
checkIsolationLevel("zone")
checkRuleIsolationLevel("zone")

// update successfully when only one default rule exists.
output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable")
Expand All @@ -711,11 +734,18 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
checkMaxReplicas(3)
checkRuleCount(3)

// We need to change isolation first because we will validate
// if the location label contains the isolation level when setting location labels.
output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "isolation-level", "host")
re.NoError(err)
re.Contains(string(output), "Success!")
output, err = pdctl.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "location-labels", "host")
re.NoError(err)
re.Contains(string(output), "Success!")
checkLocaltionLabels(1)
checkLocationLabels(1)
checkRuleLocationLabels(1)
checkIsolationLevel("host")
checkRuleIsolationLevel("host")

// update unsuccessfully when many rule exists.
fname := t.TempDir()
Expand All @@ -739,8 +769,10 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) {
re.NoError(err)
checkMaxReplicas(4)
checkRuleCount(4)
checkLocaltionLabels(1)
checkLocationLabels(1)
checkRuleLocationLabels(1)
checkIsolationLevel("host")
checkRuleIsolationLevel("host")
}

func TestPDServerConfig(t *testing.T) {
Expand Down

0 comments on commit 804b1fb

Please sign in to comment.