diff --git a/vclusterops/https_check_subcluster_op.go b/vclusterops/https_check_subcluster_op.go index cb24ead..0240604 100644 --- a/vclusterops/https_check_subcluster_op.go +++ b/vclusterops/https_check_subcluster_op.go @@ -27,18 +27,19 @@ type httpsCheckSubclusterOp struct { scName string isSecondary bool ctlSetSize int + cmdType CmdType } -func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPassword *string, - scName string, isPrimary bool, ctlSetSize int) (httpsCheckSubclusterOp, error) { +func makeHTTPSGetSubclusterInfoOp(useHTTPPassword bool, userName string, httpsPassword *string, + scName string, cmdType CmdType) (httpsCheckSubclusterOp, error) { op := httpsCheckSubclusterOp{} op.name = "HTTPSCheckSubclusterOp" op.description = "Collect information for the specified subcluster" op.scName = scName - op.isSecondary = !isPrimary - op.ctlSetSize = ctlSetSize + op.cmdType = cmdType op.useHTTPPassword = useHTTPPassword + if useHTTPPassword { err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName) if err != nil { @@ -49,6 +50,16 @@ func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPass } return op, nil } +func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPassword *string, + scName string, isPrimary bool, ctlSetSize int) (httpsCheckSubclusterOp, error) { + op, err := makeHTTPSGetSubclusterInfoOp(useHTTPPassword, userName, httpsPassword, scName, AddSubclusterCmd) + if err != nil { + return op, err + } + op.isSecondary = !isPrimary + op.ctlSetSize = ctlSetSize + return op, nil +} func (op *httpsCheckSubclusterOp) setupClusterHTTPRequest(hosts []string) error { for _, host := range hosts { @@ -87,10 +98,19 @@ type scInfo struct { SCName string `json:"subcluster_name"` IsSecondary bool `json:"is_secondary"` CtlSetSize int `json:"control_set_size"` + Sandbox string `json:"sandbox"` + IsCritical bool `json:"is_critical"` +} + +// Return true if all the results need to be scanned to figure out +// correct subcluster details +func completeScanRequired(cmdType CmdType) bool { + return cmdType == StopSubclusterCmd } func (op *httpsCheckSubclusterOp) processResult(_ *opEngineExecContext) error { var err error + isSubclusterCritical := false for host, result := range op.clusterHTTPRequest.ResultCollection { op.logResponse(host, result) @@ -107,40 +127,64 @@ func (op *httpsCheckSubclusterOp) processResult(_ *opEngineExecContext) error { // decode the json-format response // A successful response object will be like below: + /* - { - "subcluster_name": "sc1", - "control_set_size": 2, - "is_secondary": true, - "is_default": false, - "sandbox": "" - } + { + "subcluster_name": "sc1", + "control_set_size": 2, + "is_secondary": true, + "is_default": false, + "sandbox": "", + "is_critical": false + } */ subclusterInfo := scInfo{} err = op.parseAndCheckResponse(host, result.content, &subclusterInfo) if err != nil { return fmt.Errorf(`[%s] fail to parse result on host %s, details: %w`, op.name, host, err) } - - if subclusterInfo.SCName != op.scName { - return fmt.Errorf(`[%s] new subcluster name should be '%s' but got '%s'`, op.name, op.scName, subclusterInfo.SCName) - } - if subclusterInfo.IsSecondary != op.isSecondary { - if op.isSecondary { - return fmt.Errorf(`[%s] new subcluster should be a secondary subcluster but got a primary subcluster`, op.name) + if op.cmdType == AddSubclusterCmd { + err = op.verifySubclusterDetails(&subclusterInfo) + if err != nil { + return fmt.Errorf(`[%s] fail to verify subcluster info on host %s, details: %w`, op.name, host, err) } - return fmt.Errorf(`[%s] new subcluster should be a primary subcluster but got a secondary subcluster`, op.name) } - if subclusterInfo.CtlSetSize != op.ctlSetSize { - return fmt.Errorf(`[%s] new subcluster should have control set size as %d but got %d`, op.name, op.ctlSetSize, subclusterInfo.CtlSetSize) + + // cache subcluster critical info for stop subcluster command + if subclusterInfo.IsCritical { + isSubclusterCritical = true } - return nil + // early return if the command only needs response from one host + if !completeScanRequired(op.cmdType) { + return nil + } + } + if op.cmdType == StopSubclusterCmd { + if isSubclusterCritical { + return fmt.Errorf(`[%s] subcluster %s is critical, shutting the subcluster down will cause the whole database/sandbox shutdown`, + op.name, op.scName) + } } return err } +func (op *httpsCheckSubclusterOp) verifySubclusterDetails(subclusterInfo *scInfo) error { + if subclusterInfo.SCName != op.scName { + return fmt.Errorf(`[%s] new subcluster name should be '%s' but got '%s'`, op.name, op.scName, subclusterInfo.SCName) + } + if subclusterInfo.IsSecondary != op.isSecondary { + if op.isSecondary { + return fmt.Errorf(`[%s] new subcluster should be a secondary subcluster but got a primary subcluster`, op.name) + } + return fmt.Errorf(`[%s] new subcluster should be a primary subcluster but got a secondary subcluster`, op.name) + } + if subclusterInfo.CtlSetSize != op.ctlSetSize { + return fmt.Errorf(`[%s] new subcluster should have control set size as %d but got %d`, op.name, op.ctlSetSize, subclusterInfo.CtlSetSize) + } + return nil +} func (op *httpsCheckSubclusterOp) finalize(_ *opEngineExecContext) error { return nil } diff --git a/vclusterops/https_get_up_nodes_op.go b/vclusterops/https_get_up_nodes_op.go index 18e14e1..0da6eba 100644 --- a/vclusterops/https_get_up_nodes_op.go +++ b/vclusterops/https_get_up_nodes_op.go @@ -298,25 +298,6 @@ func (op *httpsGetUpNodesOp) validateHosts(nodesStates nodesStateInfo) error { return nil } -// Confirm shutting down the subcluster doesn't crash the database -func (op *httpsGetUpNodesOp) isSubclusterCritical(nodesStates nodesStateInfo, upScNodes mapset.Set[NodeInfo]) error { - allUpPrimaries := mapset.NewSet[string]() - upScHosts := mapset.NewSet[string]() - for _, n := range upScNodes.ToSlice() { - upScHosts.Add(n.Address) - } - for _, node := range nodesStates.NodeList { - if node.Sandbox == op.sandbox && node.State == util.NodeUpState && node.IsPrimary { - allUpPrimaries.Add(node.Address) - } - } - remainingPrimaries := allUpPrimaries.Difference(upScHosts) - if remainingPrimaries.Cardinality() == 0 { - return fmt.Errorf("subcluster %s is critical, shutting the subcluster down will cause the whole database shutdown", op.scName) - } - return nil -} - // Check if host is eligible to add to the UP hostlist func (op *httpsGetUpNodesOp) checkUpHostEligible(node *nodeStateInfo) bool { // Add subcluster needs to get an UP node from main cluster as initiator @@ -362,10 +343,6 @@ func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host str if !foundSC { return fmt.Errorf(`[%s] cannot find subcluster %s in database %s`, op.name, op.scName, op.DBName) } - if op.isScPrimary { - err = op.isSubclusterCritical(nodesStates, upScNodes) - return err - } } return nil } diff --git a/vclusterops/stop_subcluster.go b/vclusterops/stop_subcluster.go index 65b7ec8..128db65 100644 --- a/vclusterops/stop_subcluster.go +++ b/vclusterops/stop_subcluster.go @@ -170,6 +170,12 @@ func (vcc *VClusterCommands) produceStopSCInstructions(options *VStopSubclusterO } } + httpsGetSubclusterInfoOp, err := makeHTTPSGetSubclusterInfoOp(usePassword, options.UserName, options.Password, + options.SCName, StopSubclusterCmd) + if err != nil { + return instructions, err + } + httpsGetUpNodesOp, err := makeHTTPSGetUpScNodesOp(options.DBName, options.Hosts, usePassword, options.UserName, options.Password, StopSubclusterCmd, options.SCName) if err != nil { @@ -194,6 +200,7 @@ func (vcc *VClusterCommands) produceStopSCInstructions(options *VStopSubclusterO instructions = append(instructions, &httpsGetUpNodesOp, + &httpsGetSubclusterInfoOp, &httpsSyncCatalogOp, &httpsStopSCOp, &httpsCheckDBRunningOp,