Skip to content

Commit

Permalink
Sync from server repo (3e8a92bfbc6)
Browse files Browse the repository at this point in the history
  • Loading branch information
releng committed Oct 17, 2024
1 parent caac7f6 commit 8dffa37
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 45 deletions.
88 changes: 66 additions & 22 deletions vclusterops/https_check_subcluster_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,19 @@ type httpsCheckSubclusterOp struct {
scName string
isSecondary bool
ctlSetSize int
cmdType CmdType
}

func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPassword *string,
scName string, isPrimary bool, ctlSetSize int) (httpsCheckSubclusterOp, error) {
func makeHTTPSGetSubclusterInfoOp(useHTTPPassword bool, userName string, httpsPassword *string,
scName string, cmdType CmdType) (httpsCheckSubclusterOp, error) {
op := httpsCheckSubclusterOp{}
op.name = "HTTPSCheckSubclusterOp"
op.description = "Collect information for the specified subcluster"
op.scName = scName
op.isSecondary = !isPrimary
op.ctlSetSize = ctlSetSize
op.cmdType = cmdType

op.useHTTPPassword = useHTTPPassword

if useHTTPPassword {
err := util.ValidateUsernameAndPassword(op.name, useHTTPPassword, userName)
if err != nil {
Expand All @@ -49,6 +50,16 @@ func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPass
}
return op, nil
}
func makeHTTPSCheckSubclusterOp(useHTTPPassword bool, userName string, httpsPassword *string,
scName string, isPrimary bool, ctlSetSize int) (httpsCheckSubclusterOp, error) {
op, err := makeHTTPSGetSubclusterInfoOp(useHTTPPassword, userName, httpsPassword, scName, AddSubclusterCmd)
if err != nil {
return op, err
}
op.isSecondary = !isPrimary
op.ctlSetSize = ctlSetSize
return op, nil
}

func (op *httpsCheckSubclusterOp) setupClusterHTTPRequest(hosts []string) error {
for _, host := range hosts {
Expand Down Expand Up @@ -87,10 +98,19 @@ type scInfo struct {
SCName string `json:"subcluster_name"`
IsSecondary bool `json:"is_secondary"`
CtlSetSize int `json:"control_set_size"`
Sandbox string `json:"sandbox"`
IsCritical bool `json:"is_critical"`
}

// Return true if all the results need to be scanned to figure out
// correct subcluster details
func completeScanRequired(cmdType CmdType) bool {
return cmdType == StopSubclusterCmd
}

func (op *httpsCheckSubclusterOp) processResult(_ *opEngineExecContext) error {
var err error
isSubclusterCritical := false

for host, result := range op.clusterHTTPRequest.ResultCollection {
op.logResponse(host, result)
Expand All @@ -107,40 +127,64 @@ func (op *httpsCheckSubclusterOp) processResult(_ *opEngineExecContext) error {

// decode the json-format response
// A successful response object will be like below:

/*
{
"subcluster_name": "sc1",
"control_set_size": 2,
"is_secondary": true,
"is_default": false,
"sandbox": ""
}
{
"subcluster_name": "sc1",
"control_set_size": 2,
"is_secondary": true,
"is_default": false,
"sandbox": "",
"is_critical": false
}
*/
subclusterInfo := scInfo{}
err = op.parseAndCheckResponse(host, result.content, &subclusterInfo)
if err != nil {
return fmt.Errorf(`[%s] fail to parse result on host %s, details: %w`, op.name, host, err)
}

if subclusterInfo.SCName != op.scName {
return fmt.Errorf(`[%s] new subcluster name should be '%s' but got '%s'`, op.name, op.scName, subclusterInfo.SCName)
}
if subclusterInfo.IsSecondary != op.isSecondary {
if op.isSecondary {
return fmt.Errorf(`[%s] new subcluster should be a secondary subcluster but got a primary subcluster`, op.name)
if op.cmdType == AddSubclusterCmd {
err = op.verifySubclusterDetails(&subclusterInfo)
if err != nil {
return fmt.Errorf(`[%s] fail to verify subcluster info on host %s, details: %w`, op.name, host, err)
}
return fmt.Errorf(`[%s] new subcluster should be a primary subcluster but got a secondary subcluster`, op.name)
}
if subclusterInfo.CtlSetSize != op.ctlSetSize {
return fmt.Errorf(`[%s] new subcluster should have control set size as %d but got %d`, op.name, op.ctlSetSize, subclusterInfo.CtlSetSize)

// cache subcluster critical info for stop subcluster command
if subclusterInfo.IsCritical {
isSubclusterCritical = true
}

return nil
// early return if the command only needs response from one host
if !completeScanRequired(op.cmdType) {
return nil
}
}
if op.cmdType == StopSubclusterCmd {
if isSubclusterCritical {
return fmt.Errorf(`[%s] subcluster %s is critical, shutting the subcluster down will cause the whole database/sandbox shutdown`,
op.name, op.scName)
}
}

return err
}

func (op *httpsCheckSubclusterOp) verifySubclusterDetails(subclusterInfo *scInfo) error {
if subclusterInfo.SCName != op.scName {
return fmt.Errorf(`[%s] new subcluster name should be '%s' but got '%s'`, op.name, op.scName, subclusterInfo.SCName)
}
if subclusterInfo.IsSecondary != op.isSecondary {
if op.isSecondary {
return fmt.Errorf(`[%s] new subcluster should be a secondary subcluster but got a primary subcluster`, op.name)
}
return fmt.Errorf(`[%s] new subcluster should be a primary subcluster but got a secondary subcluster`, op.name)
}
if subclusterInfo.CtlSetSize != op.ctlSetSize {
return fmt.Errorf(`[%s] new subcluster should have control set size as %d but got %d`, op.name, op.ctlSetSize, subclusterInfo.CtlSetSize)
}
return nil
}
func (op *httpsCheckSubclusterOp) finalize(_ *opEngineExecContext) error {
return nil
}
23 changes: 0 additions & 23 deletions vclusterops/https_get_up_nodes_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,25 +298,6 @@ func (op *httpsGetUpNodesOp) validateHosts(nodesStates nodesStateInfo) error {
return nil
}

// Confirm shutting down the subcluster doesn't crash the database
func (op *httpsGetUpNodesOp) isSubclusterCritical(nodesStates nodesStateInfo, upScNodes mapset.Set[NodeInfo]) error {
allUpPrimaries := mapset.NewSet[string]()
upScHosts := mapset.NewSet[string]()
for _, n := range upScNodes.ToSlice() {
upScHosts.Add(n.Address)
}
for _, node := range nodesStates.NodeList {
if node.Sandbox == op.sandbox && node.State == util.NodeUpState && node.IsPrimary {
allUpPrimaries.Add(node.Address)
}
}
remainingPrimaries := allUpPrimaries.Difference(upScHosts)
if remainingPrimaries.Cardinality() == 0 {
return fmt.Errorf("subcluster %s is critical, shutting the subcluster down will cause the whole database shutdown", op.scName)
}
return nil
}

// Check if host is eligible to add to the UP hostlist
func (op *httpsGetUpNodesOp) checkUpHostEligible(node *nodeStateInfo) bool {
// Add subcluster needs to get an UP node from main cluster as initiator
Expand Down Expand Up @@ -362,10 +343,6 @@ func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host str
if !foundSC {
return fmt.Errorf(`[%s] cannot find subcluster %s in database %s`, op.name, op.scName, op.DBName)
}
if op.isScPrimary {
err = op.isSubclusterCritical(nodesStates, upScNodes)
return err
}
}
return nil
}
Expand Down
7 changes: 7 additions & 0 deletions vclusterops/stop_subcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ func (vcc *VClusterCommands) produceStopSCInstructions(options *VStopSubclusterO
}
}

httpsGetSubclusterInfoOp, err := makeHTTPSGetSubclusterInfoOp(usePassword, options.UserName, options.Password,
options.SCName, StopSubclusterCmd)
if err != nil {
return instructions, err
}

httpsGetUpNodesOp, err := makeHTTPSGetUpScNodesOp(options.DBName, options.Hosts,
usePassword, options.UserName, options.Password, StopSubclusterCmd, options.SCName)
if err != nil {
Expand All @@ -194,6 +200,7 @@ func (vcc *VClusterCommands) produceStopSCInstructions(options *VStopSubclusterO

instructions = append(instructions,
&httpsGetUpNodesOp,
&httpsGetSubclusterInfoOp,
&httpsSyncCatalogOp,
&httpsStopSCOp,
&httpsCheckDBRunningOp,
Expand Down

0 comments on commit 8dffa37

Please sign in to comment.