Skip to content

Commit

Permalink
Cherry-pick 9181115 with conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
vitess-bot[bot] authored and vitess-bot committed Dec 9, 2024
1 parent f0d9386 commit 4afc5b3
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 18 deletions.
13 changes: 7 additions & 6 deletions go/vt/vtorc/logic/keyspace_shard_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,16 @@ import (
)

// RefreshAllKeyspacesAndShards reloads the keyspace and shard information for the keyspaces that vtorc is concerned with.
func RefreshAllKeyspacesAndShards() {
func RefreshAllKeyspacesAndShards(ctx context.Context) error {
var keyspaces []string
if len(clustersToWatch) == 0 { // all known keyspaces
ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer cancel()
var err error
// Get all the keyspaces
keyspaces, err = ts.GetKeyspaces(ctx)
if err != nil {
log.Error(err)
return
return err
}
} else {
// Parse input and build list of keyspaces
Expand All @@ -55,14 +54,14 @@ func RefreshAllKeyspacesAndShards() {
}
if len(keyspaces) == 0 {
log.Errorf("Found no keyspaces for input: %+v", clustersToWatch)
return
return nil
}
}

// Sort the list of keyspaces.
// The list can have duplicates because the input to clusters to watch may have multiple shards of the same keyspace
sort.Strings(keyspaces)
refreshCtx, refreshCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
refreshCtx, refreshCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer refreshCancel()
var wg sync.WaitGroup
for idx, keyspace := range keyspaces {
Expand All @@ -83,6 +82,8 @@ func RefreshAllKeyspacesAndShards() {
}(keyspace)
}
wg.Wait()

return nil
}

// RefreshKeyspaceAndShard refreshes the keyspace record and shard record for the given keyspace and shard.
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vtorc/logic/keyspace_shard_discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func TestRefreshAllKeyspaces(t *testing.T) {
// Set clusters to watch to only watch ks1 and ks3
onlyKs1and3 := []string{"ks1/-80", "ks3/-80", "ks3/80-"}
clustersToWatch = onlyKs1and3
RefreshAllKeyspacesAndShards()
require.NoError(t, RefreshAllKeyspacesAndShards(context.Background()))

// Verify that we only have ks1 and ks3 in vtorc's db.
verifyKeyspaceInfo(t, "ks1", keyspaceDurabilityNone, "")
Expand All @@ -108,7 +108,7 @@ func TestRefreshAllKeyspaces(t *testing.T) {
clustersToWatch = nil
// Change the durability policy of ks1
reparenttestutil.SetKeyspaceDurability(ctx, t, ts, "ks1", "semi_sync")
RefreshAllKeyspacesAndShards()
require.NoError(t, RefreshAllKeyspacesAndShards(context.Background()))

// Verify that all the keyspaces are correctly reloaded
verifyKeyspaceInfo(t, "ks1", keyspaceDurabilitySemiSync, "")
Expand Down
34 changes: 24 additions & 10 deletions go/vt/vtorc/logic/tablet_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"time"

"github.com/spf13/pflag"

"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -71,30 +70,44 @@ func OpenTabletDiscovery() <-chan time.Time {
if _, err := db.ExecVTOrc("delete from vitess_tablet"); err != nil {
log.Error(err)
}
<<<<<<< HEAD
return time.Tick(time.Second * time.Duration(config.Config.TopoInformationRefreshSeconds)) //nolint SA1015: using time.Tick leaks the underlying ticker
=======
// We refresh all information from the topo once before we start the ticks to do
// it on a timer.
ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
defer cancel()
if err := refreshAllInformation(ctx); err != nil {
log.Errorf("failed to initialize topo information: %+v", err)
}
return time.Tick(config.GetTopoInformationRefreshDuration()) //nolint SA1015: using time.Tick leaks the underlying ticker
>>>>>>> 91811154ac (`vtorc`: require topo for `Healthy: true` in `/debug/health` (#17129))
}

// refreshAllTablets reloads the tablets from topo and discovers the ones which haven't been refreshed in a while
func refreshAllTablets() {
refreshTabletsUsing(func(tabletAlias string) {
func refreshAllTablets(ctx context.Context) error {
return refreshTabletsUsing(ctx, func(tabletAlias string) {
DiscoverInstance(tabletAlias, false /* forceDiscovery */)
}, false /* forceRefresh */)
}

<<<<<<< HEAD
func refreshTabletsUsing(loader func(tabletAlias string), forceRefresh bool) {
if !IsLeaderOrActive() {
return
}
=======
func refreshTabletsUsing(ctx context.Context, loader func(tabletAlias string), forceRefresh bool) error {
>>>>>>> 91811154ac (`vtorc`: require topo for `Healthy: true` in `/debug/health` (#17129))
if len(clustersToWatch) == 0 { // all known clusters
ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer cancel()
cells, err := ts.GetKnownCells(ctx)
if err != nil {
log.Error(err)
return
return err
}

refreshCtx, refreshCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
refreshCtx, refreshCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer refreshCancel()
var wg sync.WaitGroup
for _, cell := range cells {
Expand All @@ -115,7 +128,7 @@ func refreshTabletsUsing(loader func(tabletAlias string), forceRefresh bool) {
keyspaceShards = append(keyspaceShards, &topo.KeyspaceShard{Keyspace: input[0], Shard: input[1]})
} else {
// Assume this is a keyspace and find all shards in keyspace
ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer cancel()
shards, err := ts.GetShardNames(ctx, ks)
if err != nil {
Expand All @@ -134,9 +147,9 @@ func refreshTabletsUsing(loader func(tabletAlias string), forceRefresh bool) {
}
if len(keyspaceShards) == 0 {
log.Errorf("Found no keyspaceShards for input: %+v", clustersToWatch)
return
return nil
}
refreshCtx, refreshCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
refreshCtx, refreshCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
defer refreshCancel()
var wg sync.WaitGroup
for _, ks := range keyspaceShards {
Expand All @@ -148,6 +161,7 @@ func refreshTabletsUsing(loader func(tabletAlias string), forceRefresh bool) {
}
wg.Wait()
}
return nil
}

func refreshTabletsInCell(ctx context.Context, cell string, loader func(tabletAlias string), forceRefresh bool) {
Expand Down
39 changes: 39 additions & 0 deletions go/vt/vtorc/logic/vtorc.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@
package logic

import (
<<<<<<< HEAD

Check failure on line 20 in go/vt/vtorc/logic/vtorc.go

View workflow job for this annotation

GitHub Actions / Code Coverage

missing import path

Check failure on line 20 in go/vt/vtorc/logic/vtorc.go

View workflow job for this annotation

GitHub Actions / Code Coverage

missing import path
"os"
"os/signal"
=======
"context"
>>>>>>> 91811154ac (`vtorc`: require topo for `Healthy: true` in `/debug/health` (#17129))
"sync"
"sync/atomic"
"syscall"
Expand All @@ -27,6 +31,7 @@ import (
"github.com/patrickmn/go-cache"
"github.com/rcrowley/go-metrics"
"github.com/sjmudd/stopwatch"
"golang.org/x/sync/errgroup"

"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/servenv"
Expand Down Expand Up @@ -415,6 +420,7 @@ func ContinuousDiscovery() {
}
}()
case <-tabletTopoTick:
<<<<<<< HEAD
// Create a wait group
var wg sync.WaitGroup

Expand All @@ -439,3 +445,36 @@ func ContinuousDiscovery() {
}
}
}
=======
ctx, cancel := context.WithTimeout(context.Background(), config.GetTopoInformationRefreshDuration())
if err := refreshAllInformation(ctx); err != nil {
log.Errorf("failed to refresh topo information: %+v", err)
}
cancel()
}
}
}

// refreshAllInformation refreshes both shard and tablet information. This is meant to be run on tablet topo ticks.
func refreshAllInformation(ctx context.Context) error {
// Create an errgroup
eg, ctx := errgroup.WithContext(ctx)

// Refresh all keyspace information.
eg.Go(func() error {
return RefreshAllKeyspacesAndShards(ctx)
})

// Refresh all tablets.
eg.Go(func() error {
return refreshAllTablets(ctx)
})

// Wait for both the refreshes to complete
err := eg.Wait()
if err == nil {
process.FirstDiscoveryCycleComplete.Store(true)
}
return err
}
>>>>>>> 91811154ac (`vtorc`: require topo for `Healthy: true` in `/debug/health` (#17129))
44 changes: 44 additions & 0 deletions go/vt/vtorc/logic/vtorc_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
package logic

import (
"context"
"sync/atomic"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/vt/topo/memorytopo"
"vitess.io/vitess/go/vt/vtorc/db"
"vitess.io/vitess/go/vt/vtorc/process"
)

func TestWaitForLocksRelease(t *testing.T) {
Expand Down Expand Up @@ -54,3 +60,41 @@ func waitForLocksReleaseAndGetTimeWaitedFor() time.Duration {
waitForLocksRelease()
return time.Since(start)
}

func TestRefreshAllInformation(t *testing.T) {
// Store the old flags and restore on test completion
oldTs := ts
defer func() {
ts = oldTs
}()

// Clear the database after the test. The easiest way to do that is to run all the initialization commands again.
defer func() {
db.ClearVTOrcDatabase()
}()

// Verify in the beginning, we have the first DiscoveredOnce field false.
_, discoveredOnce := process.HealthTest()
require.False(t, discoveredOnce)

// Create a memory topo-server and create the keyspace and shard records
ts = memorytopo.NewServer(context.Background(), cell1)
_, err := ts.GetOrCreateShard(context.Background(), keyspace, shard)
require.NoError(t, err)

// Test error
ctx, cancel := context.WithCancel(context.Background())
cancel() // cancel context to simulate timeout
require.Error(t, refreshAllInformation(ctx))
require.False(t, process.FirstDiscoveryCycleComplete.Load())
_, discoveredOnce = process.HealthTest()
require.False(t, discoveredOnce)

// Test success
ctx2, cancel2 := context.WithCancel(context.Background())
defer cancel2()
require.NoError(t, refreshAllInformation(ctx2))
require.True(t, process.FirstDiscoveryCycleComplete.Load())
_, discoveredOnce = process.HealthTest()
require.True(t, discoveredOnce)
}
7 changes: 7 additions & 0 deletions go/vt/vtorc/process/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,18 @@ func RegisterNode(nodeHealth *NodeHealth) (healthy bool, err error) {
}

// HealthTest attempts to write to the backend database and get a result
<<<<<<< HEAD
func HealthTest() (health *HealthStatus, err error) {
cacheKey := util.ProcessToken.Hash
if healthStatus, found := lastHealthCheckCache.Get(cacheKey); found {
return healthStatus.(*HealthStatus), nil
}
=======
func HealthTest() (health *NodeHealth, discoveredOnce bool) {
ThisNodeHealth.LastReported = time.Now()
discoveredOnce = FirstDiscoveryCycleComplete.Load()
ThisNodeHealth.Healthy = discoveredOnce && writeHealthToDatabase()
>>>>>>> 91811154ac (`vtorc`: require topo for `Healthy: true` in `/debug/health` (#17129))

health = &HealthStatus{Healthy: false, Hostname: ThisHostname, Token: util.ProcessToken.Hash}
defer lastHealthCheckCache.Set(cacheKey, health, cache.DefaultExpiration)
Expand Down
46 changes: 46 additions & 0 deletions go/vt/vtorc/process/health_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
Copyright 2024 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package process

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestHealthTest(t *testing.T) {
defer func() {
FirstDiscoveryCycleComplete.Store(false)
ThisNodeHealth = &NodeHealth{}
}()

require.Zero(t, ThisNodeHealth.LastReported)
require.False(t, ThisNodeHealth.Healthy)

ThisNodeHealth = &NodeHealth{}
health, discoveredOnce := HealthTest()
require.False(t, health.Healthy)
require.False(t, discoveredOnce)
require.NotZero(t, ThisNodeHealth.LastReported)

ThisNodeHealth = &NodeHealth{}
FirstDiscoveryCycleComplete.Store(true)
health, discoveredOnce = HealthTest()
require.True(t, health.Healthy)
require.True(t, discoveredOnce)
require.NotZero(t, ThisNodeHealth.LastReported)
}

0 comments on commit 4afc5b3

Please sign in to comment.