diff --git a/vclusterops/helpers.go b/vclusterops/helpers.go index e259c1c..3c42cf5 100644 --- a/vclusterops/helpers.go +++ b/vclusterops/helpers.go @@ -359,3 +359,96 @@ func validateHostMaps(hosts []string, maps ...map[string]string) error { } return allErrors } + +// reIP will do re-IP before sandboxing/unsandboxing if we find the catalog has stale node IPs. +// reIP will be called in three cases: +// 1. when sandboxing a subcluster, we will do re-ip in target sandbox since the node IPs in +// the main cluster could be changed. For example, a pod in main cluster gets restarted in k8s +// will cause inconsistent IPs between the sandbox and the main cluster. The target sandbox will +// have a stale node IP so adding that pod to the sandbox will fail. +// 2. when unsandboxing a subcluster, we will do re-ip in the main cluster since the node IPs +// in the sandbox could be changed. For example, a pod in a sandbox gets restarted in k8s will +// cause inconsistent IPs between the sandbox and the main cluster. The main cluster will +// have a stale node IP so moving that pod back to the main cluster will fail. +// 3. when removing a subcluster, we will do re-ip in the main cluster since the node IPs in +// the subcluster could be changed. This is a special case in k8s online upgrade, when a pod in +// a transient subcluster gets killed, we will not restart the pods in the subcluster. Instead, +// we will remove the subcluster. At this time, the nodes inside the subcluster have different IPs +// than the ones in the catalog, so removing subcluster will fail when deleting the catalog directories. +// We cannot find the correct nodes to do the deletion. +func (vcc *VClusterCommands) reIP(options *DatabaseOptions, scName, primaryUpHost string, + nodeNameAddressMap map[string]string, reloadSpread bool) error { + reIPList := []ReIPInfo{} + reIPHosts := []string{} + vdb := makeVCoordinationDatabase() + + backupHosts := options.Hosts + // only use one up node in the sandbox/main-cluster to retrieve nodes' info, + // then we can get the latest node IPs in the sandbox/main-cluster. + // When the operation is sandbox, the initiator will be a primary up node + // from the target sandbox. + // When the operation is unsandbox, the initiator will be a primary up node + // from the main cluster. + // When the operation is remove_subcluster, the initiator will be a primary + // up node from the main cluster. + initiator := []string{primaryUpHost} + options.Hosts = initiator + err := vcc.getVDBFromRunningDBIncludeSandbox(&vdb, options, AnySandbox) + if err != nil { + return fmt.Errorf("host %q in database is not available: %w", primaryUpHost, err) + } + // restore the options.Hosts for later creating sandbox/unsandbox instructions + options.Hosts = backupHosts + + // if the current node IPs doesn't match the expected ones, we need to do re-ip + for _, vnode := range vdb.HostNodeMap { + address, ok := nodeNameAddressMap[vnode.Name] + if ok && address != vnode.Address { + reIPList = append(reIPList, ReIPInfo{NodeName: vnode.Name, TargetAddress: address}) + reIPHosts = append(reIPHosts, address) + } + } + if len(reIPList) > 0 { + return vcc.doReIP(options, scName, initiator, reIPHosts, reIPList, reloadSpread) + } + return nil +} + +// doReIP will call NMA and HTTPs endpoints to fix the IPs in the catalog. +// It will execute below steps: +// 1. collect network profile for the nodes that need to re-ip +// 2. execute re-ip on a primary up host +// 3. reload spread on a primary up host if needed +func (vcc *VClusterCommands) doReIP(options *DatabaseOptions, scName string, + initiator, reIPHosts []string, reIPList []ReIPInfo, reloadSpread bool) error { + var instructions []clusterOp + nmaNetworkProfileOp := makeNMANetworkProfileOp(reIPHosts) + err := options.setUsePassword(vcc.Log) + if err != nil { + return err + } + instructions = append(instructions, &nmaNetworkProfileOp) + for _, reIPNode := range reIPList { + httpsReIPOp, e := makeHTTPSReIPOpWithHosts(initiator, []string{reIPNode.NodeName}, + []string{reIPNode.TargetAddress}, options.usePassword, options.UserName, options.Password) + if e != nil { + return e + } + instructions = append(instructions, &httpsReIPOp) + } + if reloadSpread { + httpsReloadSpreadOp, e := makeHTTPSReloadSpreadOpWithInitiator(initiator, options.usePassword, options.UserName, options.Password) + if e != nil { + return err + } + instructions = append(instructions, &httpsReloadSpreadOp) + } + certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert} + clusterOpEngine := makeClusterOpEngine(instructions, &certs) + err = clusterOpEngine.run(vcc.Log) + if err != nil { + return fmt.Errorf("failed to re-ip nodes of subcluster %q: %w", scName, err) + } + + return nil +} diff --git a/vclusterops/nma_vertica_version_op.go b/vclusterops/nma_vertica_version_op.go index b19bb09..36a821e 100644 --- a/vclusterops/nma_vertica_version_op.go +++ b/vclusterops/nma_vertica_version_op.go @@ -338,25 +338,9 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMap(execContext *opEngineExecConte hostSCMap[host] = vnode.Subcluster.Name scHostsMap[vnode.Subcluster.Name] = append(scHostsMap[vnode.Subcluster.Name], host) } - // find subclusters that hold the target hosts - targetSCs := []string{} - for _, host := range op.targetNodeIPs { - sc, ok := hostSCMap[host] - if ok { - targetSCs = append(targetSCs, sc) - } else { - return hostNodeMap, fmt.Errorf("[%s] host %s does not exist in the database", op.name, host) - } - } - // find all hosts that in target subclusters - allHostsInTargetSCs := []string{} - for _, sc := range targetSCs { - hosts, ok := scHostsMap[sc] - if ok { - allHostsInTargetSCs = append(allHostsInTargetSCs, hosts...) - } else { - return hostNodeMap, fmt.Errorf("[%s] internal error: subcluster %s was lost when preparing the hosts", op.name, sc) - } + allHostsInTargetSCs, err := op.findHostsInTargetSubclusters(hostSCMap, scHostsMap) + if err != nil { + return hostNodeMap, err } // get host-node map for all hosts in target subclusters hostNodeMap = util.FilterMapByKey(execContext.nmaVDatabase.HostNodeMap, allHostsInTargetSCs) @@ -364,25 +348,56 @@ func (op *nmaVerticaVersionOp) prepareHostNodeMap(execContext *opEngineExecConte return hostNodeMap, nil } -// prepareHostNodeMapWithVDB is a helper to make a host-node map for nodes in the main cluster -// or in a sandbox +// prepareHostNodeMapWithVDB is a helper to make a host-node map for all nodes in the +// subclusters of target nodes func (op *nmaVerticaVersionOp) prepareHostNodeMapWithVDB() (vHostNodeMap, error) { if len(op.targetNodeIPs) == 0 { return op.vdb.HostNodeMap, nil } hostNodeMap := makeVHostNodeMap() - // we pass in the first host because we expect all of the - // target hosts to belong to the same cluster - sbName, err := op.getSandboxName(op.targetNodeIPs[0]) + hostSCMap := make(map[string]string) + scHostsMap := make(map[string][]string) + for host, vnode := range op.vdb.HostNodeMap { + hostSCMap[host] = vnode.Subcluster + scHostsMap[vnode.Subcluster] = append(scHostsMap[vnode.Subcluster], host) + } + allHostsInTargetSCs, err := op.findHostsInTargetSubclusters(hostSCMap, scHostsMap) if err != nil { return hostNodeMap, err } - for host, vnode := range op.vdb.HostNodeMap { - if vnode.Sandbox == sbName { - hostNodeMap[host] = vnode + // get host-node map for all hosts in target subclusters + hostNodeMap = util.FilterMapByKey(op.vdb.HostNodeMap, allHostsInTargetSCs) + + return hostNodeMap, nil +} + +// findHostsInTargetSubclusters is a helper function to get all hosts in the subclusters of +// target nodes. The parameters of this function are two maps: +// 1. host-subcluster map for the entire database +// 2. subcluster-hosts map for the entire database +func (op *nmaVerticaVersionOp) findHostsInTargetSubclusters(hostSCMap map[string]string, + scHostsMap map[string][]string) ([]string, error) { + allHostsInTargetSCs := []string{} + // find subclusters that hold the target hosts + targetSCs := []string{} + for _, host := range op.targetNodeIPs { + sc, ok := hostSCMap[host] + if ok { + targetSCs = append(targetSCs, sc) + } else { + return allHostsInTargetSCs, fmt.Errorf("[%s] host %s does not exist in the database", op.name, host) } } - return hostNodeMap, nil + // find all hosts that in target subclusters + for _, sc := range targetSCs { + hosts, ok := scHostsMap[sc] + if ok { + allHostsInTargetSCs = append(allHostsInTargetSCs, hosts...) + } else { + return allHostsInTargetSCs, fmt.Errorf("[%s] internal error: subcluster %s was lost when preparing the hosts", op.name, sc) + } + } + return allHostsInTargetSCs, nil } func (op *nmaVerticaVersionOp) buildHostVersionMapDefault() { @@ -443,11 +458,3 @@ func (op *nmaVerticaVersionOp) buildHostVersionMapWithVDB(execContext *opEngineE } return nil } - -func (op *nmaVerticaVersionOp) getSandboxName(host string) (string, error) { - vnode, ok := op.vdb.HostNodeMap[host] - if !ok { - return "", fmt.Errorf("[%s] host %s does not exist in the database", op.name, host) - } - return vnode.Sandbox, nil -} diff --git a/vclusterops/remove_subcluster.go b/vclusterops/remove_subcluster.go index 2cb51cd..8fc1886 100644 --- a/vclusterops/remove_subcluster.go +++ b/vclusterops/remove_subcluster.go @@ -30,6 +30,13 @@ type VRemoveScOptions struct { DatabaseOptions SCName string // subcluster to remove from database ForceDelete bool // whether force delete directories + // The expected node names with their IPs in the subcluster, the user of vclusterOps needs + // to make sure the provided values are correct. This option will be used to do re-ip in + // the cluster that contains the subcluster. + NodeNameAddressMap map[string]string + // A primary up host in another subcluster that belongs to same cluster as the target subcluster. + // This option will be used to do re-ip in the cluster. + PrimaryUpHost string } func VRemoveScOptionsFactory() VRemoveScOptions { @@ -141,6 +148,21 @@ func (vcc VClusterCommands) VRemoveSubcluster(removeScOpt *VRemoveScOptions) (VC return vdb, err } + // If the users provide extra node information, we will check and do re-ip for the nodes in + // the subcluster if necessary. This is to address the case where catalog has stale IPs of the + // nodes in the subcluster, which would cause a node removal failure at delete-directory step. + if removeScOpt.PrimaryUpHost != "" && len(removeScOpt.NodeNameAddressMap) > 0 { + e := vcc.reIP(&removeScOpt.DatabaseOptions, + removeScOpt.SCName, + removeScOpt.PrimaryUpHost, + removeScOpt.NodeNameAddressMap, + // we will do reload spread in remove_node so we don't need to do reload spread here + false /*reload spread*/) + if e != nil { + return vdb, e + } + } + // pre-check: should not remove the default subcluster vcc.PrintInfo("Performing remove_subcluster pre-checks") hostsToRemove, err := vcc.removeScPreCheck(&vdb, removeScOpt) diff --git a/vclusterops/sandbox.go b/vclusterops/sandbox.go index c6d4d34..d331c38 100644 --- a/vclusterops/sandbox.go +++ b/vclusterops/sandbox.go @@ -206,7 +206,7 @@ func (options *VSandboxOptions) runCommand(vcc VClusterCommands) error { // to provide some node information if options.SandboxPrimaryUpHost != "" && len(options.NodeNameAddressMap) > 0 { err := vcc.reIP(&options.DatabaseOptions, options.SCName, options.SandboxPrimaryUpHost, - options.NodeNameAddressMap) + options.NodeNameAddressMap, true /*reload spread*/) if err != nil { return err } @@ -242,87 +242,3 @@ func runSandboxCmd(vcc VClusterCommands, i sandboxInterface) error { return i.runCommand(vcc) } - -// reIP will do re-IP before sandboxing/unsandboxing if we find the catalog has stale node IPs. -// reIP will be called in two cases: -// 1. when sandboxing a subcluster, we will do re-ip in target sandbox since the node IPs in -// the main cluster could be changed. For example, a pod in main cluster gets restarted in k8s -// will cause inconsistent IPs between the sandbox and the main cluster. The target sandbox will -// have a stale node IP so adding that pod to the sandbox will fail. -// 2. when unsandboxing a subcluster, we will do re-ip in the main cluster since the node IPs -// in the sandbox could be changed. For example, a pod in a sandbox gets restarted in k8s will -// cause inconsistent IPs between the sandbox and the main cluster. The main cluster will -// have a stale node IP so moving that pod back to the main cluster will fail. -func (vcc *VClusterCommands) reIP(options *DatabaseOptions, scName, primaryUpHost string, - nodeNameAddressMap map[string]string) error { - reIPList := []ReIPInfo{} - reIPHosts := []string{} - vdb := makeVCoordinationDatabase() - - backupHosts := options.Hosts - // only use one up node in the sandbox/main-cluster to retrieve nodes' info, - // then we can get the latest node IPs in the sandbox/main-cluster. - // When the operation is sandbox, the initiator will be a primary up node - // from the target sandbox. When the operation is unsandbox, the initiator - // will be a primary up node from the main cluster. - initiator := []string{primaryUpHost} - options.Hosts = initiator - err := vcc.getVDBFromRunningDBIncludeSandbox(&vdb, options, AnySandbox) - if err != nil { - return fmt.Errorf("host %q in database is not available: %w", primaryUpHost, err) - } - // restore the options.Hosts for later creating sandbox/unsandbox instructions - options.Hosts = backupHosts - - // if the current node IPs doesn't match the expected ones, we need to do re-ip - for _, vnode := range vdb.HostNodeMap { - address, ok := nodeNameAddressMap[vnode.Name] - if ok && address != vnode.Address { - reIPList = append(reIPList, ReIPInfo{NodeName: vnode.Name, TargetAddress: address}) - reIPHosts = append(reIPHosts, address) - } - } - if len(reIPList) > 0 { - return vcc.doReIP(options, scName, initiator, reIPHosts, reIPList) - } - return nil -} - -// doReIP will call NMA and HTTPs endpoints to fix the IPs in the catalog. -// It will execute below steps: -// 1. collect network profile for the nodes that need to re-ip -// 2. execute re-ip on a primary up host -// 3. reload spread on a primary up host -func (vcc *VClusterCommands) doReIP(options *DatabaseOptions, scName string, - initiator, reIPHosts []string, reIPList []ReIPInfo) error { - var instructions []clusterOp - nmaNetworkProfileOp := makeNMANetworkProfileOp(reIPHosts) - err := options.setUsePassword(vcc.Log) - if err != nil { - return err - } - instructions = append(instructions, &nmaNetworkProfileOp) - for _, reIPNode := range reIPList { - httpsReIPOp, e := makeHTTPSReIPOpWithHosts(initiator, []string{reIPNode.NodeName}, - []string{reIPNode.TargetAddress}, options.usePassword, options.UserName, options.Password) - if e != nil { - return e - } - instructions = append(instructions, &httpsReIPOp) - } - // host is set to nil value in the reload spread step - // we use information from node information to find the up host later - httpsReloadSpreadOp, err := makeHTTPSReloadSpreadOpWithInitiator(initiator, options.usePassword, options.UserName, options.Password) - if err != nil { - return err - } - instructions = append(instructions, &httpsReloadSpreadOp) - certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert} - clusterOpEngine := makeClusterOpEngine(instructions, &certs) - err = clusterOpEngine.run(vcc.Log) - if err != nil { - return fmt.Errorf("failed to re-ip nodes of subcluster %q: %w", scName, err) - } - - return nil -} diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index 5621632..998f721 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -205,6 +205,12 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { // - that don't need to re-ip hostsNoNeedToReIP := options.separateHostsBasedOnReIPNeed(hostNodeNameMap, restartNodeInfo, &vdb, vcc.Log) + // check primary node count is more than nodes to re-ip, specially for sandboxes + err = options.checkQuorum(&vdb, restartNodeInfo) + if err != nil { + return err + } + // for the hosts that don't need to re-ip, // if none of them is down and no other nodes to re-ip, // we will early stop as there is no need to start them @@ -246,6 +252,24 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { return nil } +// primary up node details can vary in case of sandboxes. This check is to ensure quorum is maintained +// even when a sandbox node is reip'ed +func (options *VStartNodesOptions) checkQuorum(vdb *VCoordinationDatabase, restartNodeInfo *VStartNodesInfo) error { + sandboxPrimaryUpNodes := []string{} + for _, vnode := range vdb.HostNodeMap { + if vnode.IsPrimary && vnode.State == util.NodeUpState && vnode.Sandbox == restartNodeInfo.Sandbox { + sandboxPrimaryUpNodes = append(sandboxPrimaryUpNodes, vnode.Address) + } + } + if len(sandboxPrimaryUpNodes) <= len(restartNodeInfo.ReIPList) { + return &ReIPNoClusterQuorumError{ + Detail: fmt.Sprintf("Quorum check failed: %d up node(s) is/are not enough to re-ip %d node(s)", + len(sandboxPrimaryUpNodes), len(restartNodeInfo.ReIPList)), + } + } + return nil +} + // produceStartNodesInstructions will build a list of instructions to execute for // the restart_node command. // diff --git a/vclusterops/unsandbox.go b/vclusterops/unsandbox.go index 4bcac55..7fd8e35 100644 --- a/vclusterops/unsandbox.go +++ b/vclusterops/unsandbox.go @@ -302,7 +302,7 @@ func (options *VUnsandboxOptions) runCommand(vcc VClusterCommands) error { // to provide some node information if options.PrimaryUpHost != "" && len(options.NodeNameAddressMap) > 0 { err := vcc.reIP(&options.DatabaseOptions, options.SCName, options.PrimaryUpHost, - options.NodeNameAddressMap) + options.NodeNameAddressMap, true /*reload spread*/) if err != nil { return err }