Skip to content

Commit

Permalink
Merge branch 'master' of github.com:tikv/pd into patrol-concurrency
Browse files Browse the repository at this point in the history
Signed-off-by: lhy1024 <[email protected]>
  • Loading branch information
lhy1024 committed Jun 12, 2024
2 parents 0d02d8b + c015f14 commit 9896228
Show file tree
Hide file tree
Showing 49 changed files with 524 additions and 176 deletions.
6 changes: 6 additions & 0 deletions OWNERS_ALIASES
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Sort the member alphabetically.
aliases:
sig-critical-approvers-config:
- easonn7
- kevin-xianliu
- niubell
11 changes: 0 additions & 11 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1431,17 +1431,6 @@ func (c *client) scatterRegionsWithOptions(ctx context.Context, regionsID []uint
return resp, nil
}

// IsLeaderChange will determine whether there is a leader change.
func IsLeaderChange(err error) bool {
if err == errs.ErrClientTSOStreamClosed {
return true
}
errMsg := err.Error()
return strings.Contains(errMsg, errs.NotLeaderErr) ||
strings.Contains(errMsg, errs.MismatchLeaderErr) ||
strings.Contains(errMsg, errs.NotServedErr)
}

const (
httpSchemePrefix = "http://"
httpsSchemePrefix = "https://"
Expand Down
13 changes: 6 additions & 7 deletions client/errs/errno.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,20 @@ import (
"github.com/pingcap/errors"
)

// Note: keep the same as the ones defined on the server side to ensure the client can use them correctly.
const (
// NoLeaderErr indicates there is no leader in the cluster currently.
NoLeaderErr = "no leader"
// NotLeaderErr indicates the non-leader member received the requests which should be received by leader.
// Note: keep the same as the ones defined on the server side, because the client side checks if an error message
// contains this string to judge whether the leader is changed.
NotLeaderErr = "is not leader"
NotLeaderErr = "not leader"
// MismatchLeaderErr indicates the non-leader member received the requests which should be received by leader.
// Note: keep the same as the ones defined on the server side, because the client side checks if an error message
// contains this string to judge whether the leader is changed.
MismatchLeaderErr = "mismatch leader id"
// NotServedErr indicates an tso node/pod received the requests for the keyspace groups which are not served by it.
// Note: keep the same as the ones defined on the server side, because the client side checks if an error message
// contains this string to judge whether the leader is changed.
NotServedErr = "is not served"
// RetryTimeoutErr indicates the server is busy.
RetryTimeoutErr = "retry timeout"
// NotPrimaryErr indicates the non-primary member received the requests which should be received by primary.
NotPrimaryErr = "not primary"
)

// client errors
Expand Down
18 changes: 18 additions & 0 deletions client/errs/errs.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,29 @@
package errs

import (
"strings"

"github.com/pingcap/errors"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)

// IsLeaderChange will determine whether there is a leader/primary change.
func IsLeaderChange(err error) bool {
if err == nil {
return false
}
if err == ErrClientTSOStreamClosed {
return true
}
errMsg := err.Error()
return strings.Contains(errMsg, NoLeaderErr) ||
strings.Contains(errMsg, NotLeaderErr) ||
strings.Contains(errMsg, MismatchLeaderErr) ||
strings.Contains(errMsg, NotServedErr) ||
strings.Contains(errMsg, NotPrimaryErr)
}

// ZapError is used to make the log output easier.
func ZapError(err error, causeError ...error) zap.Field {
if err == nil {
Expand Down
49 changes: 33 additions & 16 deletions client/http/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,47 @@ func (ci *clientInner) requestWithRetry(
headerOpts ...HeaderOption,
) error {
var (
serverURL string
isLeader bool
statusCode int
err error
logFields = append(reqInfo.logFields(), zap.String("source", ci.source))
)
execFunc := func() error {
defer func() {
// If the status code is 503, it indicates that there may be PD leader/follower changes.
// If the error message contains the leader/primary change information, it indicates that there may be PD leader/primary change.
if statusCode == http.StatusServiceUnavailable || errs.IsLeaderChange(err) {
ci.sd.ScheduleCheckMemberChanged()
}
log.Debug("[pd] http request finished", append(logFields,
zap.String("server-url", serverURL),
zap.Bool("is-leader", isLeader),
zap.Int("status-code", statusCode),
zap.Error(err))...)
}()
// It will try to send the request to the PD leader first and then try to send the request to the other PD followers.
clients := ci.sd.GetAllServiceClients()
if len(clients) == 0 {
return errs.ErrClientNoAvailableMember
}
skipNum := 0
for _, cli := range clients {
url := cli.GetURL()
if reqInfo.targetURL != "" && reqInfo.targetURL != url {
serverURL = cli.GetURL()
isLeader = cli.IsConnectedToLeader()
if len(reqInfo.targetURL) > 0 && reqInfo.targetURL != serverURL {
skipNum++
continue
}
statusCode, err = ci.doRequest(ctx, url, reqInfo, headerOpts...)
statusCode, err = ci.doRequest(ctx, serverURL, reqInfo, headerOpts...)
if err == nil || noNeedRetry(statusCode) {
return err
}
log.Debug("[pd] request url failed",
zap.String("source", ci.source), zap.Bool("is-leader", cli.IsConnectedToLeader()), zap.String("url", url), zap.Error(err))
log.Debug("[pd] http request url failed", append(logFields,
zap.String("server-url", serverURL),
zap.Bool("is-leader", isLeader),
zap.Int("status-code", statusCode),
zap.Error(err))...)
}
if skipNum == len(clients) {
return errs.ErrClientNoTargetMember
Expand Down Expand Up @@ -169,26 +188,21 @@ func noNeedRetry(statusCode int) bool {

func (ci *clientInner) doRequest(
ctx context.Context,
url string, reqInfo *requestInfo,
serverURL string, reqInfo *requestInfo,
headerOpts ...HeaderOption,
) (int, error) {
var (
source = ci.source
callerID = reqInfo.callerID
name = reqInfo.name
method = reqInfo.method
body = reqInfo.body
res = reqInfo.res
respHandler = reqInfo.respHandler
url = reqInfo.getURL(serverURL)
logFields = append(reqInfo.logFields(),
zap.String("source", ci.source),
zap.String("url", url))
)
url = reqInfo.getURL(url)
logFields := []zap.Field{
zap.String("source", source),
zap.String("name", name),
zap.String("url", url),
zap.String("method", method),
zap.String("caller-id", callerID),
}
log.Debug("[pd] request the http url", logFields...)
req, err := http.NewRequestWithContext(ctx, method, url, bytes.NewBuffer(body))
if err != nil {
Expand Down Expand Up @@ -229,11 +243,14 @@ func (ci *clientInner) doRequest(
if readErr != nil {
logFields = append(logFields, zap.NamedError("read-body-error", err))
} else {
// API server will return a JSON body containing the detailed error message
// when the status code is not `http.StatusOK` 200.
bs = bytes.TrimSpace(bs)
logFields = append(logFields, zap.ByteString("body", bs))
}

log.Error("[pd] request failed with a non-200 status", logFields...)
return resp.StatusCode, errors.Errorf("request pd http api failed with status: '%s'", resp.Status)
return resp.StatusCode, errors.Errorf("request pd http api failed with status: '%s', body: '%s'", resp.Status, bs)
}

if res == nil {
Expand Down
9 changes: 9 additions & 0 deletions client/http/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type Client interface {
GetRegionStatusByKeyRange(context.Context, *KeyRange, bool) (*RegionStats, error)
GetStores(context.Context) (*StoresInfo, error)
GetStore(context.Context, uint64) (*StoreInfo, error)
DeleteStore(context.Context, uint64) error
SetStoreLabels(context.Context, int64, map[string]string) error
GetHealthStatus(context.Context) ([]Health, error)
/* Config-related interfaces */
Expand Down Expand Up @@ -440,6 +441,14 @@ func (c *client) GetStore(ctx context.Context, storeID uint64) (*StoreInfo, erro
return &store, nil
}

// DeleteStore deletes the store by ID.
func (c *client) DeleteStore(ctx context.Context, storeID uint64) error {
return c.request(ctx, newRequestInfo().
WithName(deleteStoreName).
WithURI(StoreByID(storeID)).
WithMethod(http.MethodDelete))
}

// GetClusterVersion gets the cluster version.
func (c *client) GetClusterVersion(ctx context.Context) (string, error) {
var version string
Expand Down
12 changes: 12 additions & 0 deletions client/http/request_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"

"github.com/tikv/pd/client/retry"
"go.uber.org/zap"
)

// The following constants are the names of the requests.
Expand All @@ -38,6 +39,7 @@ const (
getRegionStatusByKeyRangeName = "GetRegionStatusByKeyRange"
getStoresName = "GetStores"
getStoreName = "GetStore"
deleteStoreName = "DeleteStore"
setStoreLabelsName = "SetStoreLabels"
getHealthStatusName = "GetHealthStatus"
getConfigName = "GetConfig"
Expand Down Expand Up @@ -157,3 +159,13 @@ func (ri *requestInfo) WithTargetURL(targetURL string) *requestInfo {
func (ri *requestInfo) getURL(addr string) string {
return fmt.Sprintf("%s%s", addr, ri.uri)
}

func (ri *requestInfo) logFields() []zap.Field {
return []zap.Field{
zap.String("caller-id", ri.callerID),
zap.String("name", ri.name),
zap.String("uri", ri.uri),
zap.String("method", ri.method),
zap.String("target-url", ri.targetURL),
}
}
3 changes: 2 additions & 1 deletion client/pd_service_discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/tikv/pd/client/errs"
"github.com/tikv/pd/client/grpcutil"
"github.com/tikv/pd/client/testutil"
"google.golang.org/grpc"
Expand Down Expand Up @@ -205,7 +206,7 @@ func (suite *serviceClientTestSuite) TestServiceClient() {
re.NotNil(leaderConn)

_, err := pb.NewGreeterClient(followerConn).SayHello(suite.ctx, &pb.HelloRequest{Name: "pd"})
re.ErrorContains(err, "not leader")
re.ErrorContains(err, errs.NotLeaderErr)
resp, err := pb.NewGreeterClient(leaderConn).SayHello(suite.ctx, &pb.HelloRequest{Name: "pd"})
re.NoError(err)
re.Equal("Hello pd", resp.GetMessage())
Expand Down
7 changes: 7 additions & 0 deletions client/resource_group/controller/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# See the OWNERS docs at https://go.k8s.io/owners
options:
no_parent_owners: true
filters:
"(OWNERS|config\\.go)$":
approvers:
- sig-critical-approvers-config
7 changes: 1 addition & 6 deletions client/resource_manager_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package pd

import (
"context"
"strings"
"time"

"github.com/gogo/protobuf/proto"
Expand All @@ -35,10 +34,6 @@ const (
modify actionType = 1
groupSettingsPathPrefix = "resource_group/settings"
controllerConfigPathPrefix = "resource_group/controller"
// errNotPrimary is returned when the requested server is not primary.
errNotPrimary = "not primary"
// errNotLeader is returned when the requested server is not pd leader.
errNotLeader = "not leader"
)

// GroupSettingsPathPrefixBytes is used to watch or get resource groups.
Expand Down Expand Up @@ -83,7 +78,7 @@ func (c *client) resourceManagerClient() (rmpb.ResourceManagerClient, error) {

// gRPCErrorHandler is used to handle the gRPC error returned by the resource manager service.
func (c *client) gRPCErrorHandler(err error) {
if strings.Contains(err.Error(), errNotPrimary) || strings.Contains(err.Error(), errNotLeader) {
if errs.IsLeaderChange(err) {
c.pdSvcDiscovery.ScheduleCheckMemberChanged()
}
}
Expand Down
7 changes: 7 additions & 0 deletions client/tlsutil/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# See the OWNERS docs at https://go.k8s.io/owners
options:
no_parent_owners: true
filters:
"(OWNERS|tlsconfig\\.go)$":
approvers:
- sig-critical-approvers-config
2 changes: 1 addition & 1 deletion client/tso_dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ tsoBatchLoop:
cancel()
stream = nil
// Because ScheduleCheckMemberChanged is asynchronous, if the leader changes, we better call `updateMember` ASAP.
if IsLeaderChange(err) {
if errs.IsLeaderChange(err) {
if err := bo.Exec(ctx, svcDiscovery.CheckMemberChanged); err != nil {
select {
case <-ctx.Done():
Expand Down
7 changes: 7 additions & 0 deletions conf/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# See the OWNERS docs at https://go.k8s.io/owners
options:
no_parent_owners: true
filters:
"(OWNERS|config\\.toml)$":
approvers:
- sig-critical-approvers-config
10 changes: 10 additions & 0 deletions errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,21 @@ error = '''
redirect failed
'''

["PD:apiutil:ErrRedirectNoLeader"]
error = '''
redirect finds no leader
'''

["PD:apiutil:ErrRedirectToNotLeader"]
error = '''
redirect to not leader
'''

["PD:apiutil:ErrRedirectToNotPrimary"]
error = '''
redirect to not primary
'''

["PD:autoscaling:ErrEmptyMetricsResponse"]
error = '''
metrics response from Prometheus is empty
Expand Down
2 changes: 1 addition & 1 deletion metrics/grafana/pd.json
Original file line number Diff line number Diff line change
Expand Up @@ -2096,7 +2096,7 @@
{
"format": "dtdurations",
"label": null,
"logBase": 1,
"logBase": 2,
"max": null,
"min": "0",
"show": true
Expand Down
22 changes: 11 additions & 11 deletions pkg/core/storelimit/store_limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@ func (l *StoreRateLimit) Reset(rate float64, typ Type) {

// limit the operators of a store
type limit struct {
syncutil.RWMutex
limiter *ratelimit.RateLimiter
ratePerSec float64
limiter *ratelimit.RateLimiter
ratePerSecMutex syncutil.RWMutex
ratePerSec float64
}

// Reset resets the rate limit.
func (l *limit) Reset(ratePerSec float64) {
l.Lock()
defer l.Unlock()
l.ratePerSecMutex.Lock()
defer l.ratePerSecMutex.Unlock()
if l.ratePerSec == ratePerSec {
return
}
Expand All @@ -159,8 +159,8 @@ func (l *limit) Reset(ratePerSec float64) {
// Available returns the number of available tokens
// It returns true if the rate per second is zero.
func (l *limit) Available(n int64) bool {
l.RLock()
defer l.RUnlock()
l.ratePerSecMutex.RLock()
defer l.ratePerSecMutex.RUnlock()
if l.ratePerSec == 0 {
return true
}
Expand All @@ -170,16 +170,16 @@ func (l *limit) Available(n int64) bool {

// Take takes count tokens from the bucket without blocking.
func (l *limit) Take(count int64) bool {
l.RLock()
defer l.RUnlock()
l.ratePerSecMutex.RLock()
defer l.ratePerSecMutex.RUnlock()
if l.ratePerSec == 0 {
return true
}
return l.limiter.AllowN(int(count))
}

func (l *limit) GetRatePerSec() float64 {
l.RLock()
defer l.RUnlock()
l.ratePerSecMutex.RLock()
defer l.ratePerSecMutex.RUnlock()
return l.ratePerSec
}
Loading

0 comments on commit 9896228

Please sign in to comment.