Skip to content

Commit

Permalink
Sync from server repo (086005a29e1)
Browse files Browse the repository at this point in the history
  • Loading branch information
ctran committed Jun 10, 2024
1 parent b57ab8c commit bdbde51
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 10 deletions.
11 changes: 11 additions & 0 deletions vclusterops/cluster_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ type clusterOp interface {
setupBasicInfo()
loadCertsIfNeeded(certs *httpsCerts, findCertsInOptions bool) error
isSkipExecute() bool
filterUnreachableHosts(execContext *opEngineExecContext)
}

/* Cluster ops basic fields and functions
Expand Down Expand Up @@ -430,6 +431,16 @@ func (op *opBase) checkResponseStatusCode(resp httpsResponseStatus, host string)
return nil
}

// filterUnreachableHosts filters out the unreachable hosts from the op
// if the unreachableHosts list size > 0
func (op *opBase) filterUnreachableHosts(execContext *opEngineExecContext) {
if len(execContext.unreachableHosts) == 0 {
return
}

op.hosts = util.SliceDiff(op.hosts, execContext.unreachableHosts)
}

/* Sensitive fields in request body
*/
type sensitiveFields struct {
Expand Down
2 changes: 2 additions & 0 deletions vclusterops/cluster_op_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ func (opEngine *VClusterOpEngine) runInstruction(
op.setupSpinner()
defer op.cleanupSpinner()

op.filterUnreachableHosts(execContext)

op.logPrepare()
err := op.prepare(execContext)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions vclusterops/cluster_op_engine_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ type opEngineExecContext struct {
systemTableList systemTableListInfo // used for staging system tables
// hosts on which the wrong authentication occurred
hostsWithWrongAuth []string

// hosts that is not reachable through NMA
unreachableHosts []string
}

func makeOpEngineExecContext(logger vlog.Printer) opEngineExecContext {
Expand Down
8 changes: 7 additions & 1 deletion vclusterops/fetch_node_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ func (vcc VClusterCommands) VFetchNodeState(options *VFetchNodeStateOptions) ([]
}
}

// display warning if any unreachable hosts detected
if len(clusterOpEngine.execContext.unreachableHosts) > 0 {
vcc.DisplayWarning("hosts %v are unreachable, please check the NMA connectivity in the hosts",
clusterOpEngine.execContext.unreachableHosts)
}

return nodeStates, nil
}

Expand Down Expand Up @@ -190,7 +196,7 @@ func (vcc VClusterCommands) produceListAllNodesInstructions(
}
}

nmaHealthOp := makeNMAHealthOp(options.Hosts)
nmaHealthOp := makeNMAHealthOpSkipUnreachable(options.Hosts)
nmaReadVerticaVersionOp := makeNMAReadVerticaVersionOp(vdb)

// Trim host list
Expand Down
22 changes: 15 additions & 7 deletions vclusterops/https_stop_db_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,22 @@ type httpsStopDBOp struct {
sandbox string
mainCluster bool
RequestParams map[string]string
isEon bool
}

func makeHTTPSStopDBOp(useHTTPPassword bool, userName string,
httpsPassword *string, timeout *int, sandbox string, mainCluster bool) (httpsStopDBOp, error) {
httpsPassword *string, timeout *int, sandbox string, mainCluster, isEon bool) (httpsStopDBOp, error) {
op := httpsStopDBOp{}
op.name = "HTTPSStopDBOp"
op.description = "Stop database"
op.useHTTPPassword = useHTTPPassword
op.sandbox = sandbox
op.mainCluster = mainCluster
op.isEon = isEon

// set the query params, "timeout" is optional
op.RequestParams = make(map[string]string)
if timeout != nil {
if timeout != nil && *timeout != 0 {
op.RequestParams["timeout"] = strconv.Itoa(*timeout)
}

Expand Down Expand Up @@ -139,7 +141,8 @@ func (op *httpsStopDBOp) processResult(_ *opEngineExecContext) error {
// decode the json-format response
// The successful response object will be a dictionary:
// 1. shutdown without drain
// {"detail": "Shutdown: moveout complete"}
// 1.1 enterprise DB {"detail": "Shutdown: moveout complete"}
// 1.2 eon DB {"detail": "Shutdown: sync complete"}
// 2. shutdown with drain
// {"detail": "Set subcluster (default_subcluster) to draining state\n
// Waited for 1 nodes to drain\n
Expand All @@ -151,21 +154,26 @@ func (op *httpsStopDBOp) processResult(_ *opEngineExecContext) error {
allErrs = errors.Join(allErrs, err)
continue
}

if _, ok := op.RequestParams["timeout"]; ok {
if re.MatchString(response["details"]) {
err = fmt.Errorf(`[%s] response detail should like 'Set subcluster to draining state ...' but got '%s'`,
op.name, response["detail"])
allErrs = errors.Join(allErrs, err)
}
} else {
if response["detail"] != "Shutdown: moveout complete" {
err = fmt.Errorf(`[%s] response detail should be 'Shutdown: moveout complete' but got '%s'`, op.name, response["detail"])
// If the timeout is set to 0, we will not use a draining shutdown.
// A timeout of 0 indicates that eonDB is being used, so the response should be "Shutdown: sync complete".
// Otherwise, the response should be "Shutdown: moveout complete".
expectedDetail := "Shutdown: moveout complete"
if op.isEon {
expectedDetail = "Shutdown: sync complete"
}
if response["detail"] != expectedDetail {
err = fmt.Errorf(`[%s] response detail should be '%s' but got '%s'`, op.name, expectedDetail, response["detail"])
allErrs = errors.Join(allErrs, err)
}
}
}

return allErrs
}

Expand Down
26 changes: 25 additions & 1 deletion vclusterops/nma_health_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,15 @@ import (
"errors"
)

// we limit the health check timeout to 30 seconds
// we believe that this is enough to test the NMA connection
const nmaHealthCheckTimeout = 30

type nmaHealthOp struct {
opBase
// sometimes, we need to skip unreachable hosts
// e.g., list_all_nodes may need this when the host(s) are not connectable
skipUnreachableHost bool
}

func makeNMAHealthOp(hosts []string) nmaHealthOp {
Expand All @@ -31,12 +38,19 @@ func makeNMAHealthOp(hosts []string) nmaHealthOp {
return op
}

func makeNMAHealthOpSkipUnreachable(hosts []string) nmaHealthOp {
op := makeNMAHealthOp(hosts)
op.skipUnreachableHost = true
return op
}

// setupClusterHTTPRequest works as the module setup in Admintools
func (op *nmaHealthOp) setupClusterHTTPRequest(hosts []string) error {
for _, host := range hosts {
httpRequest := hostHTTPRequest{}
httpRequest.Method = GetMethod
httpRequest.buildNMAEndpoint("health")
httpRequest.Timeout = nmaHealthCheckTimeout
op.clusterHTTPRequest.RequestCollection[host] = httpRequest
}

Expand All @@ -61,8 +75,9 @@ func (op *nmaHealthOp) finalize(_ *opEngineExecContext) error {
return nil
}

func (op *nmaHealthOp) processResult(_ *opEngineExecContext) error {
func (op *nmaHealthOp) processResult(execContext *opEngineExecContext) error {
var allErrs error
var unreachableHosts []string
for host, result := range op.clusterHTTPRequest.ResultCollection {
op.logResponse(host, result)

Expand All @@ -72,9 +87,18 @@ func (op *nmaHealthOp) processResult(_ *opEngineExecContext) error {
return errors.Join(allErrs, err)
}
} else {
unreachableHosts = append(unreachableHosts, host)
allErrs = errors.Join(allErrs, result.err)
}
}

if op.skipUnreachableHost {
execContext.unreachableHosts = unreachableHosts
if len(unreachableHosts) > 0 {
op.stopFailSpinnerWithMessage("warning! hosts %v are unreachable", unreachableHosts)
}
return nil
}

return allErrs
}
2 changes: 1 addition & 1 deletion vclusterops/stop_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ func (vcc *VClusterCommands) produceStopDBInstructions(options *VStopDatabaseOpt
}

httpsStopDBOp, err := makeHTTPSStopDBOp(usePassword, options.UserName, options.Password, options.DrainSeconds,
options.SandboxName, options.MainCluster)
options.SandboxName, options.MainCluster, options.IsEon)
if err != nil {
return instructions, err
}
Expand Down

0 comments on commit bdbde51

Please sign in to comment.