From bdbde51b8c0b68064e69d69a6fc01783d8b9b507 Mon Sep 17 00:00:00 2001 From: ctran Date: Mon, 10 Jun 2024 12:13:28 -0400 Subject: [PATCH] Sync from server repo (086005a29e1) --- vclusterops/cluster_op.go | 11 ++++++++++ vclusterops/cluster_op_engine.go | 2 ++ vclusterops/cluster_op_engine_context.go | 3 +++ vclusterops/fetch_node_state.go | 8 +++++++- vclusterops/https_stop_db_op.go | 22 +++++++++++++------- vclusterops/nma_health_op.go | 26 +++++++++++++++++++++++- vclusterops/stop_db.go | 2 +- 7 files changed, 64 insertions(+), 10 deletions(-) diff --git a/vclusterops/cluster_op.go b/vclusterops/cluster_op.go index d71a51d..5de3837 100644 --- a/vclusterops/cluster_op.go +++ b/vclusterops/cluster_op.go @@ -196,6 +196,7 @@ type clusterOp interface { setupBasicInfo() loadCertsIfNeeded(certs *httpsCerts, findCertsInOptions bool) error isSkipExecute() bool + filterUnreachableHosts(execContext *opEngineExecContext) } /* Cluster ops basic fields and functions @@ -430,6 +431,16 @@ func (op *opBase) checkResponseStatusCode(resp httpsResponseStatus, host string) return nil } +// filterUnreachableHosts filters out the unreachable hosts from the op +// if the unreachableHosts list size > 0 +func (op *opBase) filterUnreachableHosts(execContext *opEngineExecContext) { + if len(execContext.unreachableHosts) == 0 { + return + } + + op.hosts = util.SliceDiff(op.hosts, execContext.unreachableHosts) +} + /* Sensitive fields in request body */ type sensitiveFields struct { diff --git a/vclusterops/cluster_op_engine.go b/vclusterops/cluster_op_engine.go index 0bcf1a1..4fa26e5 100644 --- a/vclusterops/cluster_op_engine.go +++ b/vclusterops/cluster_op_engine.go @@ -66,6 +66,8 @@ func (opEngine *VClusterOpEngine) runInstruction( op.setupSpinner() defer op.cleanupSpinner() + op.filterUnreachableHosts(execContext) + op.logPrepare() err := op.prepare(execContext) if err != nil { diff --git a/vclusterops/cluster_op_engine_context.go b/vclusterops/cluster_op_engine_context.go index 1bd4024..3c41025 100644 --- a/vclusterops/cluster_op_engine_context.go +++ b/vclusterops/cluster_op_engine_context.go @@ -38,6 +38,9 @@ type opEngineExecContext struct { systemTableList systemTableListInfo // used for staging system tables // hosts on which the wrong authentication occurred hostsWithWrongAuth []string + + // hosts that is not reachable through NMA + unreachableHosts []string } func makeOpEngineExecContext(logger vlog.Printer) opEngineExecContext { diff --git a/vclusterops/fetch_node_state.go b/vclusterops/fetch_node_state.go index c25c79d..d8a5649 100644 --- a/vclusterops/fetch_node_state.go +++ b/vclusterops/fetch_node_state.go @@ -112,6 +112,12 @@ func (vcc VClusterCommands) VFetchNodeState(options *VFetchNodeStateOptions) ([] } } + // display warning if any unreachable hosts detected + if len(clusterOpEngine.execContext.unreachableHosts) > 0 { + vcc.DisplayWarning("hosts %v are unreachable, please check the NMA connectivity in the hosts", + clusterOpEngine.execContext.unreachableHosts) + } + return nodeStates, nil } @@ -190,7 +196,7 @@ func (vcc VClusterCommands) produceListAllNodesInstructions( } } - nmaHealthOp := makeNMAHealthOp(options.Hosts) + nmaHealthOp := makeNMAHealthOpSkipUnreachable(options.Hosts) nmaReadVerticaVersionOp := makeNMAReadVerticaVersionOp(vdb) // Trim host list diff --git a/vclusterops/https_stop_db_op.go b/vclusterops/https_stop_db_op.go index d40a7d9..37e102b 100644 --- a/vclusterops/https_stop_db_op.go +++ b/vclusterops/https_stop_db_op.go @@ -30,20 +30,22 @@ type httpsStopDBOp struct { sandbox string mainCluster bool RequestParams map[string]string + isEon bool } func makeHTTPSStopDBOp(useHTTPPassword bool, userName string, - httpsPassword *string, timeout *int, sandbox string, mainCluster bool) (httpsStopDBOp, error) { + httpsPassword *string, timeout *int, sandbox string, mainCluster, isEon bool) (httpsStopDBOp, error) { op := httpsStopDBOp{} op.name = "HTTPSStopDBOp" op.description = "Stop database" op.useHTTPPassword = useHTTPPassword op.sandbox = sandbox op.mainCluster = mainCluster + op.isEon = isEon // set the query params, "timeout" is optional op.RequestParams = make(map[string]string) - if timeout != nil { + if timeout != nil && *timeout != 0 { op.RequestParams["timeout"] = strconv.Itoa(*timeout) } @@ -139,7 +141,8 @@ func (op *httpsStopDBOp) processResult(_ *opEngineExecContext) error { // decode the json-format response // The successful response object will be a dictionary: // 1. shutdown without drain - // {"detail": "Shutdown: moveout complete"} + // 1.1 enterprise DB {"detail": "Shutdown: moveout complete"} + // 1.2 eon DB {"detail": "Shutdown: sync complete"} // 2. shutdown with drain // {"detail": "Set subcluster (default_subcluster) to draining state\n // Waited for 1 nodes to drain\n @@ -151,7 +154,6 @@ func (op *httpsStopDBOp) processResult(_ *opEngineExecContext) error { allErrs = errors.Join(allErrs, err) continue } - if _, ok := op.RequestParams["timeout"]; ok { if re.MatchString(response["details"]) { err = fmt.Errorf(`[%s] response detail should like 'Set subcluster to draining state ...' but got '%s'`, @@ -159,13 +161,19 @@ func (op *httpsStopDBOp) processResult(_ *opEngineExecContext) error { allErrs = errors.Join(allErrs, err) } } else { - if response["detail"] != "Shutdown: moveout complete" { - err = fmt.Errorf(`[%s] response detail should be 'Shutdown: moveout complete' but got '%s'`, op.name, response["detail"]) + // If the timeout is set to 0, we will not use a draining shutdown. + // A timeout of 0 indicates that eonDB is being used, so the response should be "Shutdown: sync complete". + // Otherwise, the response should be "Shutdown: moveout complete". + expectedDetail := "Shutdown: moveout complete" + if op.isEon { + expectedDetail = "Shutdown: sync complete" + } + if response["detail"] != expectedDetail { + err = fmt.Errorf(`[%s] response detail should be '%s' but got '%s'`, op.name, expectedDetail, response["detail"]) allErrs = errors.Join(allErrs, err) } } } - return allErrs } diff --git a/vclusterops/nma_health_op.go b/vclusterops/nma_health_op.go index 51966ca..c5dadce 100644 --- a/vclusterops/nma_health_op.go +++ b/vclusterops/nma_health_op.go @@ -19,8 +19,15 @@ import ( "errors" ) +// we limit the health check timeout to 30 seconds +// we believe that this is enough to test the NMA connection +const nmaHealthCheckTimeout = 30 + type nmaHealthOp struct { opBase + // sometimes, we need to skip unreachable hosts + // e.g., list_all_nodes may need this when the host(s) are not connectable + skipUnreachableHost bool } func makeNMAHealthOp(hosts []string) nmaHealthOp { @@ -31,12 +38,19 @@ func makeNMAHealthOp(hosts []string) nmaHealthOp { return op } +func makeNMAHealthOpSkipUnreachable(hosts []string) nmaHealthOp { + op := makeNMAHealthOp(hosts) + op.skipUnreachableHost = true + return op +} + // setupClusterHTTPRequest works as the module setup in Admintools func (op *nmaHealthOp) setupClusterHTTPRequest(hosts []string) error { for _, host := range hosts { httpRequest := hostHTTPRequest{} httpRequest.Method = GetMethod httpRequest.buildNMAEndpoint("health") + httpRequest.Timeout = nmaHealthCheckTimeout op.clusterHTTPRequest.RequestCollection[host] = httpRequest } @@ -61,8 +75,9 @@ func (op *nmaHealthOp) finalize(_ *opEngineExecContext) error { return nil } -func (op *nmaHealthOp) processResult(_ *opEngineExecContext) error { +func (op *nmaHealthOp) processResult(execContext *opEngineExecContext) error { var allErrs error + var unreachableHosts []string for host, result := range op.clusterHTTPRequest.ResultCollection { op.logResponse(host, result) @@ -72,9 +87,18 @@ func (op *nmaHealthOp) processResult(_ *opEngineExecContext) error { return errors.Join(allErrs, err) } } else { + unreachableHosts = append(unreachableHosts, host) allErrs = errors.Join(allErrs, result.err) } } + if op.skipUnreachableHost { + execContext.unreachableHosts = unreachableHosts + if len(unreachableHosts) > 0 { + op.stopFailSpinnerWithMessage("warning! hosts %v are unreachable", unreachableHosts) + } + return nil + } + return allErrs } diff --git a/vclusterops/stop_db.go b/vclusterops/stop_db.go index c3f5763..c283506 100644 --- a/vclusterops/stop_db.go +++ b/vclusterops/stop_db.go @@ -209,7 +209,7 @@ func (vcc *VClusterCommands) produceStopDBInstructions(options *VStopDatabaseOpt } httpsStopDBOp, err := makeHTTPSStopDBOp(usePassword, options.UserName, options.Password, options.DrainSeconds, - options.SandboxName, options.MainCluster) + options.SandboxName, options.MainCluster, options.IsEon) if err != nil { return instructions, err }