From f3c16af7d013d1b42fdb454a519919c2f6b55536 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 23 May 2024 11:14:01 +0800 Subject: [PATCH] Optimize the logic of randomly selecting regions on a single range Signed-off-by: JmPotato --- pkg/core/region_tree.go | 69 ++++++++++++++++++++++-------------- pkg/core/region_tree_test.go | 6 ++++ 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/pkg/core/region_tree.go b/pkg/core/region_tree.go index e6d05d443a10..48c73fe24f2b 100644 --- a/pkg/core/region_tree.go +++ b/pkg/core/region_tree.go @@ -339,24 +339,32 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { } // Pre-allocate the variables to reduce the temporary memory allocations. var ( - startKey, endKey []byte - startIndex, endIndex, randIndex int - startItem *regionItem - pivotItem = ®ionItem{&RegionInfo{meta: &metapb.Region{}}} - region *RegionInfo - regions = make([]*RegionInfo, 0, n) - rangeLen, curLen = len(ranges), len(regions) + startKey, endKey []byte + // By default, we set the `startIndex` and `endIndex` to the whole tree range. + startIndex, endIndex = 0, treeLen + randIndex int + startItem *regionItem + pivotItem = ®ionItem{&RegionInfo{meta: &metapb.Region{}}} + region *RegionInfo + regions = make([]*RegionInfo, 0, n) + rangeLen, curLen = len(ranges), len(regions) // setStartEndIndices is a helper function to set `startIndex` and `endIndex` - // according to the `startKey` and `endKey`. + // according to the `startKey` and `endKey` and check if the range is invalid + // to skip the iteration. // TODO: maybe we could cache the `startIndex` and `endIndex` for each range. - setStartEndIndices = func() { - pivotItem.meta.StartKey = startKey - startItem, startIndex = t.tree.GetWithIndex(pivotItem) - if len(endKey) != 0 { + setAndCheckStartEndIndices = func() bool { + startKeyLen, endKeyLen := len(startKey), len(endKey) + if startKeyLen == 0 && endKeyLen == 0 { + startIndex, endIndex = 0, treeLen + return false + } + if startKeyLen != 0 { + pivotItem.meta.StartKey = startKey + startItem, startIndex = t.tree.GetWithIndex(pivotItem) + } + if endKeyLen != 0 { pivotItem.meta.StartKey = endKey _, endIndex = t.tree.GetWithIndex(pivotItem) - } else { - endIndex = treeLen } // Consider that the item in the tree may not be continuous, // we need to check if the previous item contains the key. @@ -366,13 +374,27 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { startIndex-- } } + // Check whether the `startIndex` and `endIndex` are valid. + if endIndex <= startIndex { + if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 { + log.Error("wrong range keys", + logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), + logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), + errs.ZapError(errs.ErrWrongRangeKeys)) + } + return true + } + return false } ) - // If no ranges specified, select randomly from the whole tree. - // This is a fast path to reduce the unnecessary iterations. - if rangeLen == 0 { - startKey, endKey = []byte(""), []byte("") - setStartEndIndices() + // This is a fast path to reduce the unnecessary iterations when we only have one range. + if rangeLen <= 1 { + if rangeLen == 1 { + startKey, endKey = ranges[0].StartKey, ranges[0].EndKey + if setAndCheckStartEndIndices() { + return regions + } + } for curLen < n { randIndex = rand.Intn(endIndex-startIndex) + startIndex region = t.tree.GetAt(randIndex).RegionInfo @@ -393,14 +415,7 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { // Shuffle the ranges to increase the randomness. for _, i := range rand.Perm(rangeLen) { startKey, endKey = ranges[i].StartKey, ranges[i].EndKey - setStartEndIndices() - if endIndex <= startIndex { - if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 { - log.Error("wrong range keys", - logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), - logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), - errs.ZapError(errs.ErrWrongRangeKeys)) - } + if setAndCheckStartEndIndices() { continue } diff --git a/pkg/core/region_tree_test.go b/pkg/core/region_tree_test.go index a86f2f52c473..5886103191c7 100644 --- a/pkg/core/region_tree_test.go +++ b/pkg/core/region_tree_test.go @@ -281,8 +281,12 @@ func TestRandomRegion(t *testing.T) { updateNewItem(tree, regionA) ra := tree.randomRegion([]KeyRange{NewKeyRange("", "")}) re.Equal(regionA, ra) + ra = tree.randomRegion(nil) + re.Equal(regionA, ra) ra2 := tree.RandomRegions(2, []KeyRange{NewKeyRange("", "")}) re.Equal([]*RegionInfo{regionA, regionA}, ra2) + ra2 = tree.RandomRegions(2, nil) + re.Equal([]*RegionInfo{regionA, regionA}, ra2) regionB := NewTestRegionInfo(2, 2, []byte("g"), []byte("n")) regionC := NewTestRegionInfo(3, 3, []byte("n"), []byte("t")) @@ -307,6 +311,7 @@ func TestRandomRegion(t *testing.T) { rf = tree.randomRegion([]KeyRange{NewKeyRange("z", "")}) re.Nil(rf) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB}, []KeyRange{NewKeyRange("", "n")}) checkRandomRegion(re, tree, []*RegionInfo{regionC, regionD}, []KeyRange{NewKeyRange("n", "")}) @@ -356,6 +361,7 @@ func TestRandomRegionDiscontinuous(t *testing.T) { rd := tree.randomRegion([]KeyRange{NewKeyRange("", "b")}) re.Equal(regionD, rd) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) }