-
Notifications
You must be signed in to change notification settings - Fork 726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
schedulers,test: avoid some test branches not being reached and remove schedulePeerPr #8087
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
6f1103a
tests: avoid some test branches not being reached
lhy1024 a91615b
make test stable
lhy1024 ccbbef1
remove schedulePeerPr
lhy1024 73aab70
remove comments
lhy1024 6e7cb84
Merge branch 'master' of github.com:tikv/pd into fix-test20
lhy1024 f4f4e04
add more tests
lhy1024 f7d8c25
address comments
lhy1024 f434c5c
update
lhy1024 8d6f0cc
Merge branch 'master' into fix-test20
HuSharp File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -47,25 +47,26 @@ | |||
// HotRegionName is balance hot region scheduler name. | ||||
HotRegionName = "balance-hot-region-scheduler" | ||||
// HotRegionType is balance hot region scheduler type. | ||||
HotRegionType = "hot-region" | ||||
splitHotReadBuckets = "split-hot-read-region" | ||||
splitHotWriteBuckets = "split-hot-write-region" | ||||
splitProgressiveRank = int64(-5) | ||||
minHotScheduleInterval = time.Second | ||||
maxHotScheduleInterval = 20 * time.Second | ||||
HotRegionType = "hot-region" | ||||
splitHotReadBuckets = "split-hot-read-region" | ||||
splitHotWriteBuckets = "split-hot-write-region" | ||||
splitProgressiveRank = int64(-5) | ||||
minHotScheduleInterval = time.Second | ||||
maxHotScheduleInterval = 20 * time.Second | ||||
defaultPendingAmpFactor = 2.0 | ||||
defaultStddevThreshold = 0.1 | ||||
defaultTopnPosition = 10 | ||||
) | ||||
|
||||
var ( | ||||
// schedulePeerPr the probability of schedule the hot peer. | ||||
schedulePeerPr = 0.66 | ||||
// pendingAmpFactor will amplify the impact of pending influence, making scheduling slower or even serial when two stores are close together | ||||
pendingAmpFactor = 2.0 | ||||
pendingAmpFactor = defaultPendingAmpFactor | ||||
// If the distribution of a dimension is below the corresponding stddev threshold, then scheduling will no longer be based on this dimension, | ||||
// as it implies that this dimension is sufficiently uniform. | ||||
stddevThreshold = 0.1 | ||||
stddevThreshold = defaultStddevThreshold | ||||
// topnPosition is the position of the topn peer in the hot peer list. | ||||
// We use it to judge whether to schedule the hot peer in some cases. | ||||
topnPosition = 10 | ||||
topnPosition = defaultTopnPosition | ||||
// statisticsInterval is the interval to update statistics information. | ||||
statisticsInterval = time.Second | ||||
) | ||||
|
@@ -120,8 +121,9 @@ | |||
// regionPendings stores regionID -> pendingInfluence, | ||||
// this records regionID which have pending Operator by operation type. During filterHotPeers, the hot peers won't | ||||
// be selected if its owner region is tracked in this attribute. | ||||
regionPendings map[uint64]*pendingInfluence | ||||
types []utils.RWType | ||||
regionPendings map[uint64]*pendingInfluence | ||||
// types is the resource types that the scheduler considers. | ||||
types []resourceType | ||||
r *rand.Rand | ||||
updateReadTime time.Time | ||||
updateWriteTime time.Time | ||||
|
@@ -131,26 +133,26 @@ | |||
base := NewBaseScheduler(opController) | ||||
ret := &baseHotScheduler{ | ||||
BaseScheduler: base, | ||||
types: []utils.RWType{utils.Write, utils.Read}, | ||||
regionPendings: make(map[uint64]*pendingInfluence), | ||||
stHistoryLoads: statistics.NewStoreHistoryLoads(utils.DimLen, sampleDuration, sampleInterval), | ||||
r: rand.New(rand.NewSource(time.Now().UnixNano())), | ||||
} | ||||
for ty := resourceType(0); ty < resourceTypeLen; ty++ { | ||||
ret.types = append(ret.types, ty) | ||||
ret.stLoadInfos[ty] = map[uint64]*statistics.StoreLoadDetail{} | ||||
} | ||||
return ret | ||||
} | ||||
|
||||
// prepareForBalance calculate the summary of pending Influence for each store and prepare the load detail for | ||||
// each store, only update read or write load detail | ||||
func (h *baseHotScheduler) prepareForBalance(rw utils.RWType, cluster sche.SchedulerCluster) { | ||||
func (h *baseHotScheduler) prepareForBalance(typ resourceType, cluster sche.SchedulerCluster) { | ||||
storeInfos := statistics.SummaryStoreInfos(cluster.GetStores()) | ||||
h.summaryPendingInfluence(storeInfos) | ||||
storesLoads := cluster.GetStoresLoads() | ||||
isTraceRegionFlow := cluster.GetSchedulerConfig().IsTraceRegionFlow() | ||||
|
||||
prepare := func(regionStats map[uint64][]*statistics.HotPeerStat, resource constant.ResourceKind) { | ||||
prepare := func(regionStats map[uint64][]*statistics.HotPeerStat, rw utils.RWType, resource constant.ResourceKind) { | ||||
ty := buildResourceType(rw, resource) | ||||
h.stLoadInfos[ty] = statistics.SummaryStoresLoad( | ||||
storeInfos, | ||||
|
@@ -160,23 +162,25 @@ | |||
isTraceRegionFlow, | ||||
rw, resource) | ||||
} | ||||
switch rw { | ||||
case utils.Read: | ||||
switch typ { | ||||
case readLeader, readPeer: | ||||
// update read statistics | ||||
if time.Since(h.updateReadTime) >= statisticsInterval { | ||||
regionRead := cluster.RegionReadStats() | ||||
prepare(regionRead, constant.LeaderKind) | ||||
prepare(regionRead, constant.RegionKind) | ||||
prepare(regionRead, utils.Read, constant.LeaderKind) | ||||
prepare(regionRead, utils.Read, constant.RegionKind) | ||||
h.updateReadTime = time.Now() | ||||
} | ||||
case utils.Write: | ||||
case writeLeader, writePeer: | ||||
// update write statistics | ||||
if time.Since(h.updateWriteTime) >= statisticsInterval { | ||||
regionWrite := cluster.RegionWriteStats() | ||||
prepare(regionWrite, constant.LeaderKind) | ||||
prepare(regionWrite, constant.RegionKind) | ||||
prepare(regionWrite, utils.Write, constant.LeaderKind) | ||||
prepare(regionWrite, utils.Write, constant.RegionKind) | ||||
h.updateWriteTime = time.Now() | ||||
} | ||||
default: | ||||
log.Error("invalid resource type", zap.String("type", typ.String())) | ||||
} | ||||
} | ||||
|
||||
|
@@ -223,7 +227,7 @@ | |||
HotPendingSum.WithLabelValues(storeLabel, rwTy, dim).Set(load) | ||||
} | ||||
|
||||
func (h *baseHotScheduler) randomRWType() utils.RWType { | ||||
func (h *baseHotScheduler) randomType() resourceType { | ||||
return h.types[h.r.Int()%len(h.types)] | ||||
} | ||||
|
||||
|
@@ -324,24 +328,32 @@ | |||
|
||||
func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { | ||||
hotSchedulerCounter.Inc() | ||||
rw := h.randomRWType() | ||||
return h.dispatch(rw, cluster), nil | ||||
typ := h.randomType() | ||||
return h.dispatch(typ, cluster), nil | ||||
} | ||||
|
||||
func (h *hotScheduler) dispatch(typ utils.RWType, cluster sche.SchedulerCluster) []*operator.Operator { | ||||
func (h *hotScheduler) dispatch(typ resourceType, cluster sche.SchedulerCluster) []*operator.Operator { | ||||
h.Lock() | ||||
defer h.Unlock() | ||||
h.updateHistoryLoadConfig(h.conf.GetHistorySampleDuration(), h.conf.GetHistorySampleInterval()) | ||||
h.prepareForBalance(typ, cluster) | ||||
// it can not move earlier to support to use api and metrics. | ||||
if h.conf.IsForbidRWType(typ) { | ||||
return nil | ||||
} | ||||
// IsForbidRWType can not be move earlier to support to use api and metrics. | ||||
switch typ { | ||||
case utils.Read: | ||||
case readLeader, readPeer: | ||||
if h.conf.IsForbidRWType(utils.Read) { | ||||
return nil | ||||
} | ||||
return h.balanceHotReadRegions(cluster) | ||||
case utils.Write: | ||||
return h.balanceHotWriteRegions(cluster) | ||||
case writePeer: | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to distinguish write? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pd/pkg/statistics/collector.go Line 68 in e7c9d15
|
||||
if h.conf.IsForbidRWType(utils.Write) { | ||||
return nil | ||||
} | ||||
return h.balanceHotWritePeers(cluster) | ||||
case writeLeader: | ||||
if h.conf.IsForbidRWType(utils.Write) { | ||||
return nil | ||||
} | ||||
return h.balanceHotWriteLeaders(cluster) | ||||
} | ||||
return nil | ||||
} | ||||
|
@@ -406,19 +418,16 @@ | |||
return nil | ||||
} | ||||
|
||||
func (h *hotScheduler) balanceHotWriteRegions(cluster sche.SchedulerCluster) []*operator.Operator { | ||||
// prefer to balance by peer | ||||
s := h.r.Intn(100) | ||||
switch { | ||||
case s < int(schedulePeerPr*100): | ||||
peerSolver := newBalanceSolver(h, cluster, utils.Write, movePeer) | ||||
ops := peerSolver.solve() | ||||
if len(ops) > 0 && peerSolver.tryAddPendingInfluence() { | ||||
return ops | ||||
} | ||||
default: | ||||
func (h *hotScheduler) balanceHotWritePeers(cluster sche.SchedulerCluster) []*operator.Operator { | ||||
peerSolver := newBalanceSolver(h, cluster, utils.Write, movePeer) | ||||
ops := peerSolver.solve() | ||||
if len(ops) > 0 && peerSolver.tryAddPendingInfluence() { | ||||
return ops | ||||
} | ||||
return nil | ||||
} | ||||
|
||||
func (h *hotScheduler) balanceHotWriteLeaders(cluster sche.SchedulerCluster) []*operator.Operator { | ||||
leaderSolver := newBalanceSolver(h, cluster, utils.Write, transferLeader) | ||||
ops := leaderSolver.solve() | ||||
if len(ops) > 0 && leaderSolver.tryAddPendingInfluence() { | ||||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about using a more clear name instead of resourceType?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have any good ideas about this name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we can merge
rwTy
andresourceType
in another PR?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1