From 51ad007fbe82bce94edb7cd8d2c259b5fcc8c4ff Mon Sep 17 00:00:00 2001
From: JmPotato <ghzpotato@gmail.com>
Date: Wed, 20 Mar 2024 14:11:16 +0800
Subject: [PATCH] Fix the bug that collected TSO requests could never be
 finished

Signed-off-by: JmPotato <ghzpotato@gmail.com>
---
 client/client.go                      | 43 +++++++++++++++------
 client/tso_batch_controller.go        | 16 +++++++-
 client/tso_client.go                  |  2 +-
 client/tso_dispatcher.go              | 30 ++++++++-------
 client/tso_stream.go                  |  8 ++--
 tests/integrations/tso/client_test.go | 55 +++++++++++++++++++++++++--
 6 files changed, 119 insertions(+), 35 deletions(-)

diff --git a/client/client.go b/client/client.go
index 31481b918e6..8838c184d92 100644
--- a/client/client.go
+++ b/client/client.go
@@ -606,12 +606,22 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) {
 	log.Info("[pd] changing service mode",
 		zap.String("old-mode", c.serviceMode.String()),
 		zap.String("new-mode", newMode.String()))
+	c.resetTSOClientLocked(newMode)
+	oldMode := c.serviceMode
+	c.serviceMode = newMode
+	log.Info("[pd] service mode changed",
+		zap.String("old-mode", oldMode.String()),
+		zap.String("new-mode", newMode.String()))
+}
+
+// Reset a new TSO client.
+func (c *client) resetTSOClientLocked(mode pdpb.ServiceMode) {
 	// Re-create a new TSO client.
 	var (
 		newTSOCli          *tsoClient
 		newTSOSvcDiscovery ServiceDiscovery
 	)
-	switch newMode {
+	switch mode {
 	case pdpb.ServiceMode_PD_SVC_MODE:
 		newTSOCli = newTSOClient(c.ctx, c.option,
 			c.pdSvcDiscovery, &pdTSOStreamBuilderFactory{})
@@ -649,11 +659,6 @@ func (c *client) setServiceMode(newMode pdpb.ServiceMode) {
 		// We are switching from API service mode to PD service mode, so delete the old tso microservice discovery.
 		oldTSOSvcDiscovery.Close()
 	}
-	oldMode := c.serviceMode
-	c.serviceMode = newMode
-	log.Info("[pd] service mode changed",
-		zap.String("old-mode", oldMode.String()),
-		zap.String("new-mode", newMode.String()))
 }
 
 func (c *client) getTSOClient() *tsoClient {
@@ -662,6 +667,13 @@ func (c *client) getTSOClient() *tsoClient {
 	return c.tsoClient
 }
 
+// ResetTSOClient resets the TSO client, only for test.
+func (c *client) ResetTSOClient() {
+	c.Lock()
+	defer c.Unlock()
+	c.resetTSOClientLocked(c.serviceMode)
+}
+
 func (c *client) getServiceMode() pdpb.ServiceMode {
 	c.RLock()
 	defer c.RUnlock()
@@ -779,18 +791,25 @@ func (c *client) GetLocalTSAsync(ctx context.Context, dcLocation string) TSFutur
 		defer span.Finish()
 	}
 
-	req := tsoReqPool.Get().(*tsoRequest)
-	req.requestCtx = ctx
-	req.clientCtx = c.ctx
-	req.start = time.Now()
-	req.dcLocation = dcLocation
-
+	req := c.getTSORequest(ctx, dcLocation)
 	if err := c.dispatchTSORequestWithRetry(req); err != nil {
 		req.done <- err
 	}
 	return req
 }
 
+func (c *client) getTSORequest(ctx context.Context, dcLocation string) *tsoRequest {
+	req := tsoReqPool.Get().(*tsoRequest)
+	// Set needed fields in the request before using it.
+	req.start = time.Now()
+	req.clientCtx = c.ctx
+	req.requestCtx = ctx
+	req.physical = 0
+	req.logical = 0
+	req.dcLocation = dcLocation
+	return req
+}
+
 const (
 	dispatchRetryDelay = 50 * time.Millisecond
 	dispatchRetryCount = 2
diff --git a/client/tso_batch_controller.go b/client/tso_batch_controller.go
index 842c772abd9..bd7a440fb08 100644
--- a/client/tso_batch_controller.go
+++ b/client/tso_batch_controller.go
@@ -16,7 +16,10 @@ package pd
 
 import (
 	"context"
+	"runtime/trace"
 	"time"
+
+	"github.com/tikv/pd/client/tsoutil"
 )
 
 type tsoBatchController struct {
@@ -130,7 +133,18 @@ func (tbc *tsoBatchController) adjustBestBatchSize() {
 	}
 }
 
-func (tbc *tsoBatchController) revokePendingRequest(err error) {
+func (tbc *tsoBatchController) finishCollectedRequests(physical, firstLogical int64, suffixBits uint32, err error) {
+	for i := 0; i < tbc.collectedRequestCount; i++ {
+		tsoReq := tbc.collectedRequests[i]
+		tsoReq.physical, tsoReq.logical = physical, tsoutil.AddLogical(firstLogical, int64(i), suffixBits)
+		defer trace.StartRegion(tsoReq.requestCtx, "pdclient.tsoReqDequeue").End()
+		tsoReq.done <- err
+	}
+	// Prevent the finished requests from being processed again.
+	tbc.collectedRequestCount = 0
+}
+
+func (tbc *tsoBatchController) revokePendingRequests(err error) {
 	for i := 0; i < len(tbc.tsoRequestCh); i++ {
 		req := <-tbc.tsoRequestCh
 		req.done <- err
diff --git a/client/tso_client.go b/client/tso_client.go
index eeeaf202ce6..c563df0efdb 100644
--- a/client/tso_client.go
+++ b/client/tso_client.go
@@ -141,7 +141,7 @@ func (c *tsoClient) Close() {
 		if dispatcherInterface != nil {
 			dispatcher := dispatcherInterface.(*tsoDispatcher)
 			tsoErr := errors.WithStack(errClosing)
-			dispatcher.tsoBatchController.revokePendingRequest(tsoErr)
+			dispatcher.tsoBatchController.revokePendingRequests(tsoErr)
 			dispatcher.dispatcherCancel()
 		}
 		return true
diff --git a/client/tso_dispatcher.go b/client/tso_dispatcher.go
index 3a6f109bfd4..a625f8dbbe1 100644
--- a/client/tso_dispatcher.go
+++ b/client/tso_dispatcher.go
@@ -350,7 +350,8 @@ func (c *tsoClient) createTSODispatcher(dcLocation string) {
 func (c *tsoClient) handleDispatcher(
 	dispatcherCtx context.Context,
 	dc string,
-	tbc *tsoBatchController) {
+	tbc *tsoBatchController,
+) {
 	var (
 		err       error
 		streamURL string
@@ -428,7 +429,11 @@ tsoBatchLoop:
 		}
 		// Start to collect the TSO requests.
 		maxBatchWaitInterval := c.option.getMaxTSOBatchWaitInterval()
+		// Once the TSO requests are collected, must make sure they could be finished or revoked eventually,
+		// otherwise the upper caller may get blocked on waiting for the results.
 		if err = tbc.fetchPendingRequests(dispatcherCtx, maxBatchWaitInterval); err != nil {
+			// Finish the collected requests if the fetch failed.
+			tbc.finishCollectedRequests(0, 0, 0, errors.WithStack(err))
 			if err == context.Canceled {
 				log.Info("[tso] stop fetching the pending tso requests due to context canceled",
 					zap.String("dc-location", dc))
@@ -468,13 +473,16 @@ tsoBatchLoop:
 				timer := time.NewTimer(retryInterval)
 				select {
 				case <-dispatcherCtx.Done():
+					// Finish the collected requests if the context is canceled.
+					tbc.finishCollectedRequests(0, 0, 0, errors.WithStack(dispatcherCtx.Err()))
 					timer.Stop()
 					return
 				case <-streamLoopTimer.C:
 					err = errs.ErrClientCreateTSOStream.FastGenByArgs(errs.RetryTimeoutErr)
 					log.Error("[tso] create tso stream error", zap.String("dc-location", dc), errs.ZapError(err))
 					c.svcDiscovery.ScheduleCheckMemberChanged()
-					c.finishRequest(tbc.getCollectedRequests(), 0, 0, 0, errors.WithStack(err))
+					// Finish the collected requests if the stream is failed to be created.
+					tbc.finishCollectedRequests(0, 0, 0, errors.WithStack(err))
 					timer.Stop()
 					continue tsoBatchLoop
 				case <-timer.C:
@@ -504,9 +512,12 @@ tsoBatchLoop:
 		}
 		select {
 		case <-dispatcherCtx.Done():
+			// Finish the collected requests if the context is canceled.
+			tbc.finishCollectedRequests(0, 0, 0, errors.WithStack(dispatcherCtx.Err()))
 			return
 		case tsDeadlineCh.(chan *deadline) <- dl:
 		}
+		// processRequests guarantees that the collected requests could be finished properly.
 		err = c.processRequests(stream, dc, tbc)
 		close(done)
 		// If error happens during tso stream handling, reset stream and run the next trial.
@@ -776,13 +787,14 @@ func (c *tsoClient) processRequests(
 			defer span.Finish()
 		}
 	}
+
 	count := int64(len(requests))
 	reqKeyspaceGroupID := c.svcDiscovery.GetKeyspaceGroupID()
 	respKeyspaceGroupID, physical, logical, suffixBits, err := stream.processRequests(
 		c.svcDiscovery.GetClusterID(), c.svcDiscovery.GetKeyspaceID(), reqKeyspaceGroupID,
-		dcLocation, requests, tbc.batchStartTime)
+		dcLocation, count, tbc.batchStartTime)
 	if err != nil {
-		c.finishRequest(requests, 0, 0, 0, err)
+		tbc.finishCollectedRequests(0, 0, 0, err)
 		return err
 	}
 	// `logical` is the largest ts's logical part here, we need to do the subtracting before we finish each TSO request.
@@ -796,7 +808,7 @@ func (c *tsoClient) processRequests(
 		logical:             tsoutil.AddLogical(firstLogical, count-1, suffixBits),
 	}
 	c.compareAndSwapTS(dcLocation, curTSOInfo, physical, firstLogical)
-	c.finishRequest(requests, physical, firstLogical, suffixBits, nil)
+	tbc.finishCollectedRequests(physical, firstLogical, suffixBits, nil)
 	return nil
 }
 
@@ -843,11 +855,3 @@ func (c *tsoClient) compareAndSwapTS(
 	lastTSOInfo.physical = curTSOInfo.physical
 	lastTSOInfo.logical = curTSOInfo.logical
 }
-
-func (c *tsoClient) finishRequest(requests []*tsoRequest, physical, firstLogical int64, suffixBits uint32, err error) {
-	for i := 0; i < len(requests); i++ {
-		requests[i].physical, requests[i].logical = physical, tsoutil.AddLogical(firstLogical, int64(i), suffixBits)
-		defer trace.StartRegion(requests[i].requestCtx, "pdclient.tsoReqDequeue").End()
-		requests[i].done <- err
-	}
-}
diff --git a/client/tso_stream.go b/client/tso_stream.go
index acefa19d21c..83c0f08d4e0 100644
--- a/client/tso_stream.go
+++ b/client/tso_stream.go
@@ -106,7 +106,7 @@ type tsoStream interface {
 	// processRequests processes TSO requests in streaming mode to get timestamps
 	processRequests(
 		clusterID uint64, keyspaceID, keyspaceGroupID uint32, dcLocation string,
-		requests []*tsoRequest, batchStartTime time.Time,
+		count int64, batchStartTime time.Time,
 	) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error)
 }
 
@@ -120,10 +120,9 @@ func (s *pdTSOStream) getServerURL() string {
 }
 
 func (s *pdTSOStream) processRequests(
-	clusterID uint64, _, _ uint32, dcLocation string, requests []*tsoRequest, batchStartTime time.Time,
+	clusterID uint64, _, _ uint32, dcLocation string, count int64, batchStartTime time.Time,
 ) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error) {
 	start := time.Now()
-	count := int64(len(requests))
 	req := &pdpb.TsoRequest{
 		Header: &pdpb.RequestHeader{
 			ClusterId: clusterID,
@@ -175,10 +174,9 @@ func (s *tsoTSOStream) getServerURL() string {
 
 func (s *tsoTSOStream) processRequests(
 	clusterID uint64, keyspaceID, keyspaceGroupID uint32, dcLocation string,
-	requests []*tsoRequest, batchStartTime time.Time,
+	count int64, batchStartTime time.Time,
 ) (respKeyspaceGroupID uint32, physical, logical int64, suffixBits uint32, err error) {
 	start := time.Now()
-	count := int64(len(requests))
 	req := &tsopb.TsoRequest{
 		Header: &tsopb.RequestHeader{
 			ClusterId:       clusterID,
diff --git a/tests/integrations/tso/client_test.go b/tests/integrations/tso/client_test.go
index 3d7b099f342..b0bd6f1d4e5 100644
--- a/tests/integrations/tso/client_test.go
+++ b/tests/integrations/tso/client_test.go
@@ -21,6 +21,7 @@ import (
 	"math/rand"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"testing"
 	"time"
 
@@ -66,6 +67,10 @@ type tsoClientTestSuite struct {
 	clients          []pd.Client
 }
 
+func (suite *tsoClientTestSuite) getBackendEndpoints() []string {
+	return strings.Split(suite.backendEndpoints, ",")
+}
+
 func TestLegacyTSOClient(t *testing.T) {
 	suite.Run(t, &tsoClientTestSuite{
 		legacy: true,
@@ -98,7 +103,7 @@ func (suite *tsoClientTestSuite) SetupSuite() {
 	suite.keyspaceIDs = make([]uint32, 0)
 
 	if suite.legacy {
-		client, err := pd.NewClientWithContext(suite.ctx, strings.Split(suite.backendEndpoints, ","), pd.SecurityOption{}, pd.WithForwardingOption(true))
+		client, err := pd.NewClientWithContext(suite.ctx, suite.getBackendEndpoints(), pd.SecurityOption{}, pd.WithForwardingOption(true))
 		re.NoError(err)
 		innerClient, ok := client.(interface{ GetServiceDiscovery() pd.ServiceDiscovery })
 		re.True(ok)
@@ -173,7 +178,7 @@ func (suite *tsoClientTestSuite) waitForAllKeyspaceGroupsInServing(re *require.A
 
 	// Create clients and make sure they all have discovered the tso service.
 	suite.clients = mcs.WaitForMultiKeyspacesTSOAvailable(
-		suite.ctx, re, suite.keyspaceIDs, strings.Split(suite.backendEndpoints, ","))
+		suite.ctx, re, suite.keyspaceIDs, suite.getBackendEndpoints())
 	re.Equal(len(suite.keyspaceIDs), len(suite.clients))
 }
 
@@ -254,7 +259,7 @@ func (suite *tsoClientTestSuite) TestDiscoverTSOServiceWithLegacyPath() {
 	ctx, cancel := context.WithCancel(suite.ctx)
 	defer cancel()
 	client := mcs.SetupClientWithKeyspaceID(
-		ctx, re, keyspaceID, strings.Split(suite.backendEndpoints, ","))
+		ctx, re, keyspaceID, suite.getBackendEndpoints())
 	defer client.Close()
 	var lastTS uint64
 	for j := 0; j < tsoRequestRound; j++ {
@@ -420,6 +425,50 @@ func (suite *tsoClientTestSuite) TestRandomShutdown() {
 	re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/tso/fastUpdatePhysicalInterval"))
 }
 
+func (suite *tsoClientTestSuite) TestGetTSWhileRestingTSOClient() {
+	re := suite.Require()
+	var (
+		clients    []pd.Client
+		stopSignal atomic.Bool
+		wg         sync.WaitGroup
+	)
+	// Create independent clients to prevent interfering with other tests.
+	if suite.legacy {
+		client, err := pd.NewClientWithContext(suite.ctx, suite.getBackendEndpoints(), pd.SecurityOption{}, pd.WithForwardingOption(true))
+		re.NoError(err)
+		clients = []pd.Client{client}
+	} else {
+		clients = mcs.WaitForMultiKeyspacesTSOAvailable(suite.ctx, re, suite.keyspaceIDs, suite.getBackendEndpoints())
+	}
+	wg.Add(tsoRequestConcurrencyNumber * len(clients))
+	for i := 0; i < tsoRequestConcurrencyNumber; i++ {
+		for _, client := range clients {
+			go func(client pd.Client) {
+				defer wg.Done()
+				var lastTS uint64
+				for !stopSignal.Load() {
+					physical, logical, err := client.GetTS(suite.ctx)
+					if err != nil {
+						re.ErrorContains(err, context.Canceled.Error())
+					} else {
+						ts := tsoutil.ComposeTS(physical, logical)
+						re.Less(lastTS, ts)
+						lastTS = ts
+					}
+				}
+			}(client)
+		}
+	}
+	// Reset the TSO clients while requesting TSO concurrently.
+	for i := 0; i < tsoRequestConcurrencyNumber; i++ {
+		for _, client := range clients {
+			client.(interface{ ResetTSOClient() }).ResetTSOClient()
+		}
+	}
+	stopSignal.Store(true)
+	wg.Wait()
+}
+
 // When we upgrade the PD cluster, there may be a period of time that the old and new PDs are running at the same time.
 func TestMixedTSODeployment(t *testing.T) {
 	re := require.New(t)