Skip to content

Commit

Permalink
Move parse logic and printing of cluster messages into the same place
Browse files Browse the repository at this point in the history
  • Loading branch information
johscheuer committed Sep 18, 2023
1 parent 4b5aecb commit 20246c7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 29 deletions.
10 changes: 1 addition & 9 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func (r *FoundationDBClusterReconciler) Reconcile(ctx context.Context, request c
clusterLog.Info("Fetch machine-readable status for reconcilitation loop", "cacheStatus", cacheStatus)
status, err = r.getStatusFromClusterOrDummyStatus(clusterLog, cluster)
if err != nil {
return ctrl.Result{Requeue: true}, err
return ctrl.Result{Requeue: true, RequeueAfter: 10 * time.Second}, err
}
}

Expand Down Expand Up @@ -481,14 +481,6 @@ func (r *FoundationDBClusterReconciler) getStatusFromClusterOrDummyStatus(logger

status, err := adminClient.GetStatus()
if err == nil {
if len(status.Client.Messages) > 0 {
logger.Info("found client message(s) in the machine-readable status", "messages", status.Client.Messages)
}

if len(status.Cluster.Messages) > 0 {
logger.Info("found cluster message(s) in the machine-readable status", "messages", status.Cluster.Messages)
}

return status, nil
}

Expand Down
14 changes: 2 additions & 12 deletions fdbclient/admin_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,7 @@ func (client *cliAdminClient) getStatusFromCli() (*fdbv1beta2.FoundationDBStatus
return nil, err
}

status := &fdbv1beta2.FoundationDBStatus{}
err = json.Unmarshal(contents, status)
if err != nil {
return nil, err
}

// TODO (johscheuer): Build a smarter retry mechanism here for timeouts that are not timeouts on the transaction
// level but rather on the get status itself, e.g. we should be checking for `status_incomplete_timeout`. We could specify
// a retry of X or we just force the operator to reconcile again.

return status, nil
return parseMachineReadableStatus(client.log, contents)
}

// getStatus uses fdbcli to connect to the FDB cluster, if the cluster is upgraded and the initial version returns no processes
Expand Down Expand Up @@ -327,7 +317,7 @@ func (client *cliAdminClient) GetStatus() (*fdbv1beta2.FoundationDBStatus, error
defer adminClientMutex.Unlock()

// This will call directly the database and fetch the status information from the system key space.
status, err := getStatusFromDB(client.fdbLibClient, MaxCliTimeout)
status, err := getStatusFromDB(client.fdbLibClient, client.log, MaxCliTimeout)
// There is a limitation in the multi version client if the cluster is only partially upgraded e.g. because not
// all fdbserver processes are restarted, then the multi version client sometimes picks the wrong version
// to connect to the cluster. This will result in an empty status only reporting the unreachable coordinators.
Expand Down
36 changes: 28 additions & 8 deletions fdbclient/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package fdbclient
import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"path"
Expand All @@ -45,6 +46,31 @@ const (
defaultTransactionTimeout = 5 * time.Second
)

func parseMachineReadableStatus(logger logr.Logger, contents []byte) (*fdbv1beta2.FoundationDBStatus, error) {
status := &fdbv1beta2.FoundationDBStatus{}
err := json.Unmarshal(contents, status)
if err != nil {
return nil, err
}

if len(status.Client.Messages) > 0 {
logger.Info("found client message(s) in the machine-readable status", "messages", status.Client.Messages)
}

if len(status.Cluster.Messages) > 0 {
logger.Info("found cluster message(s) in the machine-readable status", "messages", status.Cluster.Messages)

// If the status is incomplete because of a timeout, return an error. This will force a new reconciliation.
for _, message := range status.Cluster.Messages {
if message.Name == "status_incomplete_timeout" {
return nil, fdbv1beta2.TimeoutError{Err: fmt.Errorf("found \"status_incomplete_timeout\" in cluster messages")}
}
}
}

return status, nil
}

// getFDBDatabase opens an FDB database.
func getFDBDatabase(cluster *fdbv1beta2.FoundationDBCluster) (fdb.Database, error) {
clusterFile, err := createClusterFile(cluster)
Expand Down Expand Up @@ -97,19 +123,13 @@ func getConnectionStringFromDB(libClient fdbLibClient, timeout time.Duration) ([
}

// getStatusFromDB gets the database's status directly from the system key
func getStatusFromDB(libClient fdbLibClient, timeout time.Duration) (*fdbv1beta2.FoundationDBStatus, error) {
func getStatusFromDB(libClient fdbLibClient, logger logr.Logger, timeout time.Duration) (*fdbv1beta2.FoundationDBStatus, error) {
contents, err := libClient.getValueFromDBUsingKey("\xff\xff/status/json", timeout)
if err != nil {
return nil, err
}

status := &fdbv1beta2.FoundationDBStatus{}
err = json.Unmarshal(contents, status)
if err != nil {
return nil, err
}

return status, nil
return parseMachineReadableStatus(logger, contents)
}

type realDatabaseClientProvider struct {
Expand Down

0 comments on commit 20246c7

Please sign in to comment.