Skip to content

Commit

Permalink
Sync from server repo (8576fa6f8d0)
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Spilchen committed Sep 6, 2023
1 parent 45e5fed commit b466b39
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 68 deletions.
22 changes: 22 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "vcluster",
"type": "go",
"request": "launch",
"mode": "test",
"program": "${workspaceFolder}/vclusterops"
},
{
"name": "commands",
"type": "go",
"request": "launch",
"mode": "test",
"program": "${workspaceFolder}/commands"
}
]
}
4 changes: 4 additions & 0 deletions commands/cmd_restart_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package commands
import (
"flag"
"fmt"
"strconv"

"github.com/go-logr/logr"
"github.com/vertica/vcluster/vclusterops"
Expand Down Expand Up @@ -45,6 +46,9 @@ func makeCmdRestartNodes() *CmdRestartNodes {
util.GetOptionalFlagMsg("Forcefully use the user input instead of reading the options from "+vclusterops.ConfigFileName))
restartNodesOptions.ConfigDirectory = newCmd.parser.String("config-directory", "",
util.GetOptionalFlagMsg("Directory where "+vclusterops.ConfigFileName+" is located"))
restartNodesOptions.StatePollingTimeout = *newCmd.parser.Int("timeout", util.DefaultTimeoutSeconds,
util.GetOptionalFlagMsg("Set a timeout (in seconds) for polling node state operation, default timeout is "+
strconv.Itoa(util.DefaultTimeoutSeconds)+"seconds"))

newCmd.restartNodesOptions = &restartNodesOptions
newCmd.parser.Usage = func() {
Expand Down
11 changes: 4 additions & 7 deletions commands/cmd_start_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package commands
import (
"flag"
"fmt"
"strconv"

"github.com/go-logr/logr"
"github.com/vertica/vcluster/vclusterops"
Expand All @@ -23,7 +24,6 @@ type CmdStartDB struct {
IgnoreClusterLease *bool // ignore the cluster lease in communal storage
Unsafe *bool // Start database unsafely, skipping recovery.
Fast *bool // Attempt fast startup database
Timeout *int // Timeout for starting the database
}

func makeCmdStartDB() *CmdStartDB {
Expand All @@ -49,7 +49,9 @@ func makeCmdStartDB() *CmdStartDB {
util.GetOptionalFlagMsg("Forcefully use the user's input instead of reading the options from "+vclusterops.ConfigFileName))
startDBOptions.ConfigDirectory = newCmd.parser.String("config-directory", "",
util.GetOptionalFlagMsg("Directory where "+vclusterops.ConfigFileName+" is located"))

startDBOptions.StatePollingTimeout = *newCmd.parser.Int("timeout", util.DefaultTimeoutSeconds,
util.GetOptionalFlagMsg("Set a timeout (in seconds) for polling node state operation, default timeout is "+
strconv.Itoa(util.DefaultTimeoutSeconds)+"seconds"))
// eon flags
newCmd.isEon = newCmd.parser.Bool("eon-mode", false, util.GetEonFlagMsg("Indicate if the database is an Eon database."+
" Use it when you do not trust "+vclusterops.ConfigFileName))
Expand All @@ -61,7 +63,6 @@ func makeCmdStartDB() *CmdStartDB {
newCmd.AllowFallbackKeygen = newCmd.parser.Bool("allow_fallback_keygen", false, util.SuppressHelp)
newCmd.IgnoreClusterLease = newCmd.parser.Bool("ignore_cluster_lease", false, util.SuppressHelp)
newCmd.Fast = newCmd.parser.Bool("fast", false, util.SuppressHelp)
newCmd.Timeout = newCmd.parser.Int("timeout", util.DefaultDrainSeconds, util.SuppressHelp)

newCmd.startDBOptions = &startDBOptions
newCmd.parser.Usage = func() {
Expand Down Expand Up @@ -96,10 +97,6 @@ func (c *CmdStartDB) Parse(inputArgv []string) error {
c.CmdBase.ipv6 = nil
}

if util.IsOptionSet(c.parser, "password") {
c.startDBOptions.UsePassword = true
}

if !util.IsOptionSet(c.parser, "config-directory") {
c.startDBOptions.ConfigDirectory = nil
}
Expand Down
8 changes: 5 additions & 3 deletions vclusterops/https_check_db_running.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,10 @@ func (op *HTTPCheckRunningDBOp) processResult(_ *OpEngineExecContext) error {
switch op.opType {
case CreateDB:
vlog.LogPrintInfoln("Aborting database creation")
case StopDB, StartDB:
case StopDB:
vlog.LogPrintInfoln("The database has not been down yet")
case StartDB:
vlog.LogPrintInfoln("Aborting database start")
case ReviveDB:
vlog.LogPrintInfoln("Aborting database revival")
}
Expand All @@ -248,9 +250,9 @@ func (op *HTTPCheckRunningDBOp) processResult(_ *OpEngineExecContext) error {
func (op *HTTPCheckRunningDBOp) execute(execContext *OpEngineExecContext) error {
vlog.LogInfo("[%s] Execute() for operation %s", op.name, op.opType)
switch op.opType {
case CreateDB, ReviveDB:
case CreateDB, StartDB, ReviveDB:
return op.checkDBConnection(execContext)
case StopDB, StartDB:
case StopDB:
return op.pollForDBDown(execContext)
}

Expand Down
35 changes: 23 additions & 12 deletions vclusterops/https_poll_node_state_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,23 @@ type HTTPSPollNodeStateOp struct {
allHosts map[string]interface{}
upHosts map[string]interface{}
notUpHosts []string
timeout int
}

func makeHTTPSPollNodeStateOp(hosts []string,
func makeHTTPSPollNodeStateOpWithTimeout(hosts []string,
useHTTPPassword bool, userName string, httpsPassword *string,
) (HTTPSPollNodeStateOp, error) {
timeout int) (HTTPSPollNodeStateOp, error) {
op, err := makeHTTPSPollNodeStateOp(hosts, useHTTPPassword, userName, httpsPassword)
if err != nil {
return op, err
}
op.timeout = timeout
return op, nil
}

func makeHTTPSPollNodeStateOp(hosts []string,
useHTTPPassword bool, userName string,
httpsPassword *string) (HTTPSPollNodeStateOp, error) {
httpsPollNodeStateOp := HTTPSPollNodeStateOp{}
httpsPollNodeStateOp.name = "HTTPSPollNodeStateOp"
httpsPollNodeStateOp.hosts = hosts
Expand All @@ -54,7 +66,12 @@ func makeHTTPSPollNodeStateOp(hosts []string,
for _, h := range hosts {
httpsPollNodeStateOp.allHosts[h] = struct{}{}
}

timeoutSecondStr := util.GetEnv("NODE_STATE_POLLING_TIMEOUT", strconv.Itoa(StartupPollingTimeout))
timeoutSecond, err := strconv.Atoi(timeoutSecondStr)
if err != nil {
return HTTPSPollNodeStateOp{}, err
}
httpsPollNodeStateOp.timeout = timeoutSecond
return httpsPollNodeStateOp, nil
}

Expand Down Expand Up @@ -98,13 +115,7 @@ func (op *HTTPSPollNodeStateOp) finalize(_ *OpEngineExecContext) error {

func (op *HTTPSPollNodeStateOp) processResult(execContext *OpEngineExecContext) error {
startTime := time.Now()
timeoutSecondStr := util.GetEnv("NODE_STATE_POLLING_TIMEOUT", strconv.Itoa(StartupPollingTimeout))
timeoutSecond, err := strconv.Atoi(timeoutSecondStr)
if err != nil {
return fmt.Errorf("invalid timeout value %s: %w", timeoutSecondStr, err)
}

duration := time.Duration(timeoutSecond) * time.Second
duration := time.Duration(op.timeout) * time.Second
count := 0
for endTime := startTime.Add(duration); ; {
if time.Now().After(endTime) {
Expand Down Expand Up @@ -134,7 +145,7 @@ func (op *HTTPSPollNodeStateOp) processResult(execContext *OpEngineExecContext)
// show the hosts that are not UP
sort.Strings(op.notUpHosts)
msg := fmt.Sprintf("The following hosts are not up after %d seconds: %v",
timeoutSecond, op.notUpHosts)
op.timeout, op.notUpHosts)
vlog.LogPrintError(msg)
return errors.New(msg)
}
Expand All @@ -159,7 +170,7 @@ func (op *HTTPSPollNodeStateOp) shouldStopPolling() (bool, error) {
// We don't need to wait until timeout to determine if all nodes are up or not.
// If we find the wrong password for the HTTPS service on any hosts, we should fail immediately."
if result.IsPasswordandCertificateError() {
vlog.LogPrintError("[%s] Database is UP, but user has provided wrong credentials so unable to perform further operations",
vlog.LogPrintError("[%s] All nodes are UP, but the credentials are incorrect. Catalog sync failed.",
op.name)
return false, fmt.Errorf("[%s] wrong password/certificate for https service on host %s",
op.name, host)
Expand Down
10 changes: 6 additions & 4 deletions vclusterops/restart_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type VRestartNodesOptions struct {
DatabaseOptions
// A set of nodes(nodename - host) that we want to restart in the database
Nodes map[string]string
// timeout for polling nodes that we want to restart in HTTPSPollNodeStateOp
StatePollingTimeout int
}

type VRestartNodesInfo struct {
Expand All @@ -46,7 +48,7 @@ func VRestartNodesOptionsFactory() VRestartNodesOptions {

// set default values to the params
opt.setDefaultValues()

opt.StatePollingTimeout = util.DefaultTimeoutSeconds
return opt
}

Expand All @@ -60,7 +62,7 @@ func (options *VRestartNodesOptions) validateRequiredOptions() error {
return err
}
if len(options.Nodes) == 0 {
return fmt.Errorf("must specify a list of NODENAME=REIPHOST pairs")
return fmt.Errorf("--restart option is required")
}

return nil
Expand Down Expand Up @@ -276,8 +278,8 @@ func produceRestartNodesInstructions(restartNodeInfo *VRestartNodesInfo, options
return instructions, err
}
nmaRestartNewNodesOp := makeNMAStartNodeOpWithVDB(restartNodeInfo.HostsToRestart, vdb)
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOp(restartNodeInfo.HostsToRestart,
options.usePassword, *options.UserName, options.Password)
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOpWithTimeout(restartNodeInfo.HostsToRestart,
options.usePassword, *options.UserName, options.Password, options.StatePollingTimeout)
if err != nil {
return instructions, err
}
Expand Down
63 changes: 25 additions & 38 deletions vclusterops/start_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,18 @@ import (

// Normal strings are easier and safer to use in Go.
type VStartDatabaseOptions struct {
// part 1: basic db info
// basic db info
DatabaseOptions
// part 2: hidden info
UsePassword bool
}

type VStartDatabaseInfo struct {
DBName string
Hosts []string
UserName string
Password *string
CatalogPath string
HostCatalogPath map[string]string
// Timeout for polling the states of all nodes in the database in HTTPSPollNodeStateOp
StatePollingTimeout int
}

func VStartDatabaseOptionsFactory() VStartDatabaseOptions {
opt := VStartDatabaseOptions{}

// set default values to the params
opt.SetDefaultValues()

opt.StatePollingTimeout = util.DefaultTimeoutSeconds
return opt
}

Expand Down Expand Up @@ -109,14 +100,14 @@ func (vcc *VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) erro
return err
}

// build startDBInfo from config file and options
startDBInfo := new(VStartDatabaseInfo)
startDBInfo.DBName, startDBInfo.Hosts = options.GetNameAndHosts(config)
startDBInfo.HostCatalogPath = make(map[string]string)
startDBInfo.CatalogPath = options.GetCatalogPrefix(config)
// get db name and hosts from config file and options
dbName, hosts := options.GetNameAndHosts(config)
options.Name = &dbName
options.Hosts = hosts
options.CatalogPrefix = options.GetCatalogPrefix(config)

// produce start_db instructions
instructions, err := produceStartDBInstructions(startDBInfo, options)
instructions, err := produceStartDBInstructions(options)
if err != nil {
err = fmt.Errorf("fail to production instructions: %w", err)
return err
Expand All @@ -142,37 +133,33 @@ func (vcc *VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) erro
// The generated instructions will later perform the following operations necessary
// for a successful start_db:
// - Check NMA connectivity
// - Check to see if any dbs running
// - Check Vertica versions
// - Check to see if any dbs running
// - Use NMA /catalog/database to get the best source node for spread.conf and vertica.conf
// - Sync the confs to the rest of nodes who have lower catalog version (results from the previous step)
// - Start all nodes of the database
// - Poll node startup
// - Sync catalog (Eon mode only)
func produceStartDBInstructions(startDBInfo *VStartDatabaseInfo, options *VStartDatabaseOptions) ([]ClusterOp, error) {
func produceStartDBInstructions(options *VStartDatabaseOptions) ([]ClusterOp, error) {
var instructions []ClusterOp

nmaHealthOp := makeNMAHealthOp(startDBInfo.Hosts)
nmaHealthOp := makeNMAHealthOp(options.Hosts)
// require to have the same vertica version
nmaVerticaVersionOp := makeNMAVerticaVersionOp(startDBInfo.Hosts, true)
nmaVerticaVersionOp := makeNMAVerticaVersionOp(options.Hosts, true)
// need username for https operations
usePassword := false
if options.Password != nil {
usePassword = true
err := options.ValidateUserName()
if err != nil {
return instructions, err
}
err := options.SetUsePassword()
if err != nil {
return instructions, err
}

checkDBRunningOp, err := makeHTTPCheckRunningDBOp(startDBInfo.Hosts,
usePassword, *options.UserName, options.Password, StartDB)
checkDBRunningOp, err := makeHTTPCheckRunningDBOp(options.Hosts,
options.usePassword, *options.UserName, options.Password, StartDB)
if err != nil {
return instructions, err
}

vdb := VCoordinationDatabase{}
nmaGetNodesInfoOp := makeNMAGetNodesInfoOp(startDBInfo.Hosts, *options.Name, startDBInfo.CatalogPath, &vdb)
nmaGetNodesInfoOp := makeNMAGetNodesInfoOp(options.Hosts, *options.Name, *options.CatalogPrefix, &vdb)

nmaReadCatalogEditorOp, err := makeNMAReadCatalogEditorOp([]string{}, &vdb)
if err != nil {
Expand All @@ -192,12 +179,12 @@ func produceStartDBInstructions(startDBInfo *VStartDatabaseInfo, options *VStart
// we will remove the nil parameters in VER-88401 by adding them in execContext
produceTransferConfigOps(&instructions,
nil, /*source hosts for transferring configuration files*/
startDBInfo.Hosts,
options.Hosts,
nil /*db configurations retrieved from a running db*/)

nmaStartNewNodesOp := makeNMAStartNodeOp(startDBInfo.Hosts)
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOp(startDBInfo.Hosts,
usePassword, *options.UserName, options.Password)
nmaStartNewNodesOp := makeNMAStartNodeOp(options.Hosts)
httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOpWithTimeout(options.Hosts,
options.usePassword, *options.UserName, options.Password, options.StatePollingTimeout)
if err != nil {
return instructions, err
}
Expand All @@ -208,7 +195,7 @@ func produceStartDBInstructions(startDBInfo *VStartDatabaseInfo, options *VStart
)

if options.IsEon.ToBool() {
httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(startDBInfo.Hosts, true, *options.UserName, options.Password)
httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, true, *options.UserName, options.Password)
if err != nil {
return instructions, err
}
Expand Down
8 changes: 4 additions & 4 deletions vclusterops/vcluster_database_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,16 +303,16 @@ func (opt *DatabaseOptions) GetHosts(config *ClusterConfig) (hosts []string) {
}

// GetCatalogPrefix can choose the right catalog prefix from user input and config file
func (opt *DatabaseOptions) GetCatalogPrefix(config *ClusterConfig) (catalogPrefix string) {
func (opt *DatabaseOptions) GetCatalogPrefix(config *ClusterConfig) (catalogPrefix *string) {
// when config file is not available, we use user input
// HonorUserInput must be true at this time, otherwise vcluster has stopped when it cannot find the config file
if config == nil {
return *opt.CatalogPrefix
return opt.CatalogPrefix
}
catalogPrefix = config.CatalogPath
catalogPrefix = &config.CatalogPath
// if HonorUserInput is set, we choose the user input
if *opt.CatalogPrefix != "" && *opt.HonorUserInput {
catalogPrefix = *opt.CatalogPrefix
catalogPrefix = opt.CatalogPrefix
}
return catalogPrefix
}
Expand Down

0 comments on commit b466b39

Please sign in to comment.