From 982fa22ffad01ebe640e5c23f3c6d0c67bde6742 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Fri, 11 Oct 2024 12:16:14 +0800 Subject: [PATCH] *: fix the race of `TestRaftClusterMultipleRestart` (#8686) close tikv/pd#8543 Signed-off-by: Ryan Leung Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/replication/replication_mode.go | 2 ++ server/cluster/cluster.go | 5 +++++ tests/server/cluster/cluster_test.go | 2 ++ 3 files changed, 9 insertions(+) diff --git a/pkg/replication/replication_mode.go b/pkg/replication/replication_mode.go index fd55874ce30..cba83cf1ebb 100644 --- a/pkg/replication/replication_mode.go +++ b/pkg/replication/replication_mode.go @@ -345,6 +345,7 @@ func (m *ModeManager) Run(ctx context.Context) { select { case <-timer.C: case <-ctx.Done(): + log.Info("replication mode manager is stopped") return } @@ -383,6 +384,7 @@ func (m *ModeManager) Run(ctx context.Context) { }() wg.Wait() + log.Info("replication mode manager is stopped") } func minimalUpVoters(rule *placement.Rule, upStores, downStores []*core.StoreInfo) int { diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index f1630d433d1..4cce39fa093 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -766,6 +766,11 @@ func (c *RaftCluster) Stop() { log.Info("raft cluster is stopped") } +// Wait blocks until the cluster is stopped. Only for test purpose. +func (c *RaftCluster) Wait() { + c.wg.Wait() +} + // IsRunning return if the cluster is running. func (c *RaftCluster) IsRunning() bool { c.RLock() diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index a1a7f68646e..a9be92d19e9 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -615,6 +615,8 @@ func TestRaftClusterMultipleRestart(t *testing.T) { // let the job run at small interval re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) for i := 0; i < 100; i++ { + // See https://github.com/tikv/pd/issues/8543 + rc.Wait() err = rc.Start(leaderServer.GetServer()) re.NoError(err) time.Sleep(time.Millisecond)