Skip to content

Commit

Permalink
Sync from server repo (97ac3bf50a9)
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Spilchen committed Jan 4, 2024
1 parent 09a503f commit a05178d
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 59 deletions.
2 changes: 1 addition & 1 deletion vclusterops/add_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (vcc *VClusterCommands) VAddNode(options *VAddNodeOptions) (VCoordinationDa
// to add already exists in db.
func checkAddNodeRequirements(vdb *VCoordinationDatabase, hostsToAdd []string) error {
// we don't want any of the new host to be part of the db.
if nodes := vdb.containNodes(hostsToAdd); len(nodes) != 0 {
if nodes, _ := vdb.containNodes(hostsToAdd); len(nodes) != 0 {
return fmt.Errorf("%s already exist in the database", strings.Join(nodes, ","))
}

Expand Down
22 changes: 12 additions & 10 deletions vclusterops/coordinator_database.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"path/filepath"
"strings"

mapset "github.com/deckarep/golang-set/v2"
"github.com/vertica/vcluster/vclusterops/util"
"github.com/vertica/vcluster/vclusterops/vlog"
"golang.org/x/exp/maps"
Expand Down Expand Up @@ -264,21 +265,22 @@ func (vdb *VCoordinationDatabase) getSCNames() []string {
return scNames
}

// containNodes returns the number of input nodes contained in the vdb.
func (vdb *VCoordinationDatabase) containNodes(nodes []string) []string {
hostSet := make(map[string]struct{})
for _, n := range nodes {
hostSet[n] = struct{}{}
}
dupHosts := []string{}
// containNodes determines which nodes are in the vdb and which ones are not.
// The node is determined by looking up the host address.
func (vdb *VCoordinationDatabase) containNodes(nodes []string) (nodesInDB, nodesNotInDB []string) {
hostSet := mapset.NewSet(nodes...)
nodesInDB = []string{}
for _, vnode := range vdb.HostNodeMap {
address := vnode.Address
if _, exist := hostSet[address]; exist {
dupHosts = append(dupHosts, address)
if exist := hostSet.Contains(address); exist {
nodesInDB = append(nodesInDB, address)
}
}

return dupHosts
if len(nodesInDB) == len(nodes) {
return nodesInDB, nil
}
return nodesInDB, util.SliceDiff(nodes, nodesInDB)
}

// hasAtLeastOneDownNode returns true if the current VCoordinationDatabase instance
Expand Down
11 changes: 5 additions & 6 deletions vclusterops/https_drop_node_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package vclusterops

import (
"errors"
"strconv"

"github.com/vertica/vcluster/vclusterops/util"
"github.com/vertica/vcluster/vclusterops/vlog"
Expand All @@ -29,12 +30,14 @@ type httpsDropNodeOp struct {
RequestParams map[string]string
}

// makeHTTPSDropNodeOp is a constructor for httpsDropNodeOp. The cascade option
// should be true if an EON deployment and the node we are dropping is down.
func makeHTTPSDropNodeOp(logger vlog.Printer, vnode string,
initiatorHost []string,
useHTTPPassword bool,
userName string,
httpsPassword *string,
isEon bool) (httpsDropNodeOp, error) {
cascade bool) (httpsDropNodeOp, error) {
op := httpsDropNodeOp{}
op.name = "HTTPSDropNodeOp"
op.logger = logger.WithName(op.name)
Expand All @@ -48,11 +51,7 @@ func makeHTTPSDropNodeOp(logger vlog.Printer, vnode string,
op.userName = userName
op.httpsPassword = httpsPassword
op.RequestParams = make(map[string]string)
if isEon {
op.RequestParams["cascade"] = "true"
return op, nil
}
op.RequestParams["cascade"] = "false"
op.RequestParams["cascade"] = strconv.FormatBool(cascade)
return op, nil
}

Expand Down
103 changes: 85 additions & 18 deletions vclusterops/remove_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package vclusterops
import (
"errors"
"fmt"
"strings"

"github.com/vertica/vcluster/vclusterops/util"
"github.com/vertica/vcluster/vclusterops/vlog"
Expand Down Expand Up @@ -135,11 +134,15 @@ func (vcc *VClusterCommands) VRemoveNode(options *VRemoveNodeOptions) (VCoordina

options.DBName = &dbName
options.Hosts = hosts
// get depot and data prefix from config file or options
// get depot, data and catalog prefix from config file or options
*options.DepotPrefix, *options.DataPrefix, err = options.getDepotAndDataPrefix(options.Config)
if err != nil {
return vdb, err
}
options.CatalogPrefix, err = options.getCatalogPrefix(options.Config)
if err != nil {
return vdb, err
}

err = vcc.getVDBFromRunningDB(&vdb, &options.DatabaseOptions)
if err != nil {
Expand All @@ -151,20 +154,46 @@ func (vcc *VClusterCommands) VRemoveNode(options *VRemoveNodeOptions) (VCoordina
return vdb, err
}

// remove_node is aborted if requirements are not met
err = checkRemoveNodeRequirements(&vdb, options.HostsToRemove)
// remove_node is aborted if requirements are not met.
err = checkRemoveNodeRequirements(&vdb)
if err != nil {
return vdb, err
}
// Figure out if the nodes to remove exist in the catalog. We follow
// *normal* remove node logic if it still exists in the catalog. We tolerate
// requests for nodes that aren't in the catalog because the caller may not
// know (e.g. previous attempt to remove node didn't come back successful).
// We have a simplified remove process for those requests to remove state
// that the caller may be checking.
var hostsNotInCatalog []string
options.HostsToRemove, hostsNotInCatalog = vdb.containNodes(options.HostsToRemove)

vdb, err = vcc.removeNodesInCatalog(options, &vdb)
if err != nil || len(hostsNotInCatalog) == 0 {
return vdb, err
}

return vcc.handleRemoveNodeForHostsNotInCatalog(&vdb, options, hostsNotInCatalog)
}

err = options.setInitiator(vdb.PrimaryUpNodes)
// removeNodesInCatalog will perform the steps to remove nodes. The node list in
// options.HostsToRemove has already been verified that each node is in the
// catalog.
func (vcc *VClusterCommands) removeNodesInCatalog(options *VRemoveNodeOptions, vdb *VCoordinationDatabase) (VCoordinationDatabase, error) {
if len(options.HostsToRemove) == 0 {
vcc.Log.Info("Exit early because there are no hosts to remove")
return *vdb, nil
}
vcc.Log.V(1).Info("validated input hosts", "HostsToRemove", options.HostsToRemove)

err := options.setInitiator(vdb.PrimaryUpNodes)
if err != nil {
return vdb, err
return *vdb, err
}

instructions, err := vcc.produceRemoveNodeInstructions(&vdb, options)
instructions, err := vcc.produceRemoveNodeInstructions(vdb, options)
if err != nil {
return vdb, fmt.Errorf("fail to produce remove node instructions, %w", err)
return *vdb, fmt.Errorf("fail to produce remove node instructions, %w", err)
}

remainingHosts := util.SliceDiff(vdb.HostList, options.HostsToRemove)
Expand All @@ -177,7 +206,7 @@ func (vcc *VClusterCommands) VRemoveNode(options *VRemoveNodeOptions) (VCoordina
// Here we check whether the to-be-removed nodes are still in the catalog.
// If they have been removed from catalog, we let remove_node succeed.
if vcc.findRemovedNodesInCatalog(options, remainingHosts) {
return vdb, fmt.Errorf("fail to complete remove node operation, %w", runError)
return *vdb, fmt.Errorf("fail to complete remove node operation, %w", runError)
}
// If the target nodes have already been removed from catalog,
// show a warning about the run error for users to trouble shoot their machines
Expand All @@ -189,20 +218,57 @@ func (vcc *VClusterCommands) VRemoveNode(options *VRemoveNodeOptions) (VCoordina
return vdb.copy(remainingHosts), nil
}

// checkRemoveNodeRequirements validates the following remove_node requirements:
// - Check the existence of the nodes to remove
// - Check if all nodes are up or standby (enterprise only)
func checkRemoveNodeRequirements(vdb *VCoordinationDatabase, hostsToRemove []string) error {
if nodes := vdb.containNodes(hostsToRemove); len(nodes) != len(hostsToRemove) {
notFoundHosts := util.SliceDiff(hostsToRemove, nodes)
return fmt.Errorf("%s do not exist in the database", strings.Join(notFoundHosts, ","))
// handleRemoveNodeForHostsNotInCatalog will build and execute a list of
// instructions to do remove of hosts that aren't present in the catalog. We
// will do basic cleanup logic for this needed by the operator.
func (vcc *VClusterCommands) handleRemoveNodeForHostsNotInCatalog(vdb *VCoordinationDatabase, options *VRemoveNodeOptions,
missingHosts []string) (VCoordinationDatabase, error) {
vcc.Log.Info("Doing cleanup of hosts missing from database", "hostsNotInCatalog", missingHosts)

// We need to find the paths for the hosts we are removing.
nmaGetNodesInfoOp := makeNMAGetNodesInfoOp(vcc.Log, missingHosts, *options.DBName, *options.CatalogPrefix,
false /* report all errors */, vdb)
instructions := []clusterOp{&nmaGetNodesInfoOp}
certs := httpsCerts{key: options.Key, cert: options.Cert, caCert: options.CaCert}
opEng := makeClusterOpEngine(instructions, &certs)
err := opEng.run(vcc.Log)
if err != nil {
return *vdb, fmt.Errorf("failed to get node info for missing hosts: %w", err)
}

// Make a vdb of just the missing hosts. The host list for
// nmaDeleteDirectoriesOp uses the host list from the vdb.
vdbForDeleteDir := vdb.copy(missingHosts)
err = options.completeVDBSetting(&vdbForDeleteDir)
if err != nil {
return *vdb, err
}

// Using the paths fetched earlier, we can now build the list of directories
// that the NMA should remove.
nmaDeleteDirectoriesOp, err := makeNMADeleteDirectoriesOp(vcc.Log, &vdbForDeleteDir, *options.ForceDelete)
if err != nil {
return *vdb, err
}
instructions = []clusterOp{&nmaDeleteDirectoriesOp}
opEng = makeClusterOpEngine(instructions, &certs)
err = opEng.run(vcc.Log)
if err != nil {
return *vdb, fmt.Errorf("failed to delete directories for missing hosts: %w", err)
}

remainingHosts := util.SliceDiff(vdb.HostList, missingHosts)
return vdb.copy(remainingHosts), nil
}

// checkRemoveNodeRequirements validates any remove_node requirements. It will
// return an error if a requirement isn't met.
func checkRemoveNodeRequirements(vdb *VCoordinationDatabase) error {
if !vdb.IsEon {
if vdb.hasAtLeastOneDownNode() {
return errors.New("all nodes must be up or standby")
}
}

return nil
}

Expand Down Expand Up @@ -379,7 +445,8 @@ func (vcc *VClusterCommands) produceDropNodeOps(instructions *[]clusterOp, targe
hostNodeMap vHostNodeMap, isEon bool) error {
for _, host := range targetHosts {
httpsDropNodeOp, err := makeHTTPSDropNodeOp(vcc.Log, hostNodeMap[host].Name, hosts,
useHTTPPassword, userName, httpsPassword, isEon)
useHTTPPassword, userName, httpsPassword,
isEon && hostNodeMap[host].State == util.NodeDownState)
if err != nil {
return err
}
Expand Down
55 changes: 36 additions & 19 deletions vclusterops/start_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func (vcc *VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) erro

func (vcc *VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase) error {
// pre-instruction to perform basic checks and get basic information
preInstructions, err := vcc.produceStartDBPreCheck(options, vdb)
preInstructions, err := vcc.produceStartDBPreCheck(options, vdb, *options.TrimHostList)
if err != nil {
return fmt.Errorf("fail to production instructions: %w", err)
}
Expand All @@ -202,28 +202,35 @@ func (vcc *VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions,
return fmt.Errorf("fail to start database pre-checks: %w", runError)
}

// if TrimHostList is true,
// update the host list as some provided hosts may not exist in the catalog
// If requested, remove any provided hosts that are not in the catalog. Use
// the vdb that we just fetched by the catalog editor. It will be the from
// the latest catalog.
if *options.TrimHostList {
var trimmedHostList []string
var extraHosts []string

for _, h := range options.Hosts {
if _, exist := vdb.HostNodeMap[h]; exist {
trimmedHostList = append(trimmedHostList, h)
} else {
extraHosts = append(extraHosts, h)
}
}
options.Hosts = vcc.removeHostsNotInCatalog(&clusterOpEngine.execContext.nmaVDatabase, options.Hosts)
}

return nil
}

if len(extraHosts) > 0 {
vcc.Log.PrintInfo("The following hosts will be trimmed as they are not found in catalog: %+v",
extraHosts)
options.Hosts = trimmedHostList
func (vcc *VClusterCommands) removeHostsNotInCatalog(vdb *nmaVDatabase, hosts []string) []string {
var trimmedHostList []string
var extraHosts []string

vcc.Log.Info("checking if any input hosts can be removed",
"hosts", hosts, "hostNodeMap", vdb.HostNodeMap)
for _, h := range hosts {
if _, exist := vdb.HostNodeMap[h]; exist {
trimmedHostList = append(trimmedHostList, h)
} else {
extraHosts = append(extraHosts, h)
}
}

return nil
if len(extraHosts) > 0 {
vcc.Log.PrintInfo("The following hosts will be trimmed as they are not found in catalog: %+v",
extraHosts)
}
return trimmedHostList
}

// produceStartDBPreCheck will build a list of pre-check instructions to execute for
Expand All @@ -234,7 +241,9 @@ func (vcc *VClusterCommands) runStartDBPrecheck(options *VStartDatabaseOptions,
// - Check NMA connectivity
// - Check to see if any dbs run
// - Get nodes' information by calling the NMA /nodes endpoint
func (vcc *VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase) ([]clusterOp, error) {
// - Find latest catalog to use for removal of nodes not in the catalog
func (vcc *VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOptions, vdb *VCoordinationDatabase,
findLatestCatalog bool) ([]clusterOp, error) {
var instructions []clusterOp

nmaHealthOp := makeNMAHealthOp(vcc.Log, options.Hosts)
Expand All @@ -261,6 +270,14 @@ func (vcc *VClusterCommands) produceStartDBPreCheck(options *VStartDatabaseOptio
instructions = append(instructions, &nmaGetNodesInfoOp)
}

if findLatestCatalog {
nmaReadCatalogEditorOp, err := makeNMAReadCatalogEditorOp(vcc.Log, vdb)
if err != nil {
return instructions, err
}
instructions = append(instructions, &nmaReadCatalogEditorOp)
}

return instructions, nil
}

Expand Down
22 changes: 17 additions & 5 deletions vclusterops/start_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,10 @@ func (options *VStartNodesOptions) validateAnalyzeOptions(logger vlog.Printer) e
}

// VStartNodes starts the given nodes for a cluster that has not yet lost
// cluster quorum and returns any error encountered.
// If necessary, it updates the node's IP in the Vertica catalog.
// If cluster quorum is already lost, use VStartDatabase.
// cluster quorum. Returns any error encountered. If necessary, it updates the
// node's IP in the Vertica catalog. If cluster quorum is already lost, use
// VStartDatabase. It will skip any nodes given that no longer exist in the
// catalog.
func (vcc *VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
/*
* - Produce Instructions
Expand Down Expand Up @@ -163,8 +164,12 @@ func (vcc *VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
for nodename, newIP := range options.Nodes {
oldIP, ok := hostNodeNameMap[nodename]
if !ok {
vcc.Log.PrintError("fail to provide a non-existent node name %s", nodename)
return fmt.Errorf("the node with the provided name %s does not exist", nodename)
// We can get here if the caller requests a node that we were in the
// middle of removing. Log a warning and continue without starting
// that node.
vcc.Log.Info("skipping start of node that doesn't exist in the catalog",
"nodename", nodename, "newIP", newIP)
continue
}
// if the IP that is given is different than the IP in the catalog, a re-ip is necessary
if oldIP != newIP {
Expand All @@ -181,6 +186,13 @@ func (vcc *VClusterCommands) VStartNodes(options *VStartNodesOptions) error {
restartNodeInfo.HostsToStart = append(restartNodeInfo.HostsToStart, restartNodeInfo.ReIPList...)
restartNodeInfo.HostsToStart = append(restartNodeInfo.HostsToStart, hostsNoNeedToReIP...)

// If no nodes found to start. We can simply exit here. This can happen if
// given a list of nodes that aren't in the catalog any longer.
if len(restartNodeInfo.HostsToStart) == 0 {
vcc.Log.Info("None of the nodes provided are in the catalog. There is nothing to start.")
return nil
}

// produce restart_node instructions
instructions, err := vcc.produceStartNodesInstructions(restartNodeInfo, options, &vdb)
if err != nil {
Expand Down

0 comments on commit a05178d

Please sign in to comment.