diff --git a/commands/cluster_command_launcher.go b/commands/cluster_command_launcher.go index 4bc8573..6912556 100644 --- a/commands/cluster_command_launcher.go +++ b/commands/cluster_command_launcher.go @@ -32,9 +32,15 @@ const defaultLogPath = "/opt/vertica/log/vcluster.log" const defaultExecutablePath = "/opt/vertica/bin/vcluster" const CLIVersion = "2.0.0" -const vclusterLogPathEnv = "VCLUSTER_LOG_PATH" -const vclusterKeyFileEnv = "VCLUSTER_KEY_FILE" -const vclusterCertFileEnv = "VCLUSTER_CERT_FILE" + +// environment variables +const ( + vclusterLogPathEnv = "VCLUSTER_LOG_PATH" + vclusterKeyFileEnv = "VCLUSTER_KEY_FILE" + vclusterCertFileEnv = "VCLUSTER_CERT_FILE" + vclusterCACertFileEnv = "VCLUSTER_CA_CERT_FILE" + vclusterTLSModeEnv = "VCLUSTER_TLS_MODE" +) // *Flag is for the flag name, *Key is for viper key name // They are bound together @@ -67,6 +73,10 @@ const ( keyFileKey = "keyFile" certFileFlag = "cert-file" certFileKey = "certFile" + caCertFileFlag = "ca-cert-file" + caCertFileKey = "caCertFile" + tlsModeFlag = "tls-mode" + tlsModeKey = "tlsMode" passwordFlag = "password" passwordKey = "password" passwordFileFlag = "password-file" @@ -127,6 +137,8 @@ var flagKeyMap = map[string]string{ logPathFlag: logPathKey, keyFileFlag: keyFileKey, certFileFlag: certFileKey, + caCertFileFlag: caCertFileKey, + tlsModeFlag: tlsModeKey, passwordFlag: passwordKey, passwordFileFlag: passwordFileKey, readPasswordFromPromptFlag: readPasswordFromPromptKey, @@ -149,6 +161,15 @@ var targetFlagKeyMap = map[string]string{ targetPasswordFileFlag: targetPasswordFileKey, } +// map of viper keys to environment variables +var keyEnvVarMap = map[string]string{ + logPathKey: vclusterLogPathEnv, + keyFileKey: vclusterKeyFileEnv, + certFileKey: vclusterCertFileEnv, + caCertFileKey: vclusterCACertFileEnv, + tlsModeKey: vclusterTLSModeEnv, +} + const ( createDBSubCmd = "create_db" stopDBSubCmd = "stop_db" @@ -183,10 +204,12 @@ const ( // cmdGlobals holds global variables shared by multiple // commands type cmdGlobals struct { - verbose bool - file *os.File - keyFile string - certFile string + verbose bool + file *os.File + keyFile string + certFile string + caCertFile string + tlsMode string // Global variables for targetDB are used for the replication subcommand targetHosts []string @@ -262,6 +285,8 @@ func initVcc(cmd *cobra.Command) vclusterops.VClusterCommands { } // setDBOptionsUsingViper can set the value of flag using the relevant key in viper +// +//nolint:gocyclo func setDBOptionsUsingViper(flag string) error { switch flag { case dbNameFlag: @@ -288,6 +313,10 @@ func setDBOptionsUsingViper(flag string) error { globals.keyFile = viper.GetString(keyFileKey) case certFileFlag: globals.certFile = viper.GetString(certFileKey) + case caCertFileFlag: + globals.caCertFile = viper.GetString(caCertFileKey) + case tlsModeFlag: + globals.tlsMode = viper.GetString(tlsModeKey) case verboseFlag: globals.verbose = viper.GetBool(verboseKey) default: @@ -329,13 +358,13 @@ func configViper(cmd *cobra.Command, flagsInConfig []string) error { } // log-path is a flag that all the subcommands need flagsInConfig = append(flagsInConfig, logPathFlag) - // cert-file and key-file are not available for + // TLS related flags are not available for // - manage_config // - manage_config show // - create_connection if cmd.CalledAs() != manageConfigSubCmd && cmd.CalledAs() != configShowSubCmd && cmd.CalledAs() != createConnectionSubCmd { - flagsInConfig = append(flagsInConfig, certFileFlag, keyFileFlag) + flagsInConfig = append(flagsInConfig, certFileFlag, keyFileFlag, caCertFileFlag, tlsModeFlag) } // bind viper keys to cobra flags @@ -364,17 +393,11 @@ func configViper(cmd *cobra.Command, flagsInConfig []string) error { // bind viper keys to env vars func bindKeysToEnv() error { - err := viper.BindEnv(logPathKey, vclusterLogPathEnv) - if err != nil { - return fmt.Errorf("fail to bind viper key %q to environment variable %q: %w", logPathKey, vclusterLogPathEnv, err) - } - err = viper.BindEnv(keyFileKey, vclusterKeyFileEnv) - if err != nil { - return fmt.Errorf("fail to bind viper key %q to environment variable %q: %w", keyFileKey, vclusterKeyFileEnv, err) - } - err = viper.BindEnv(certFileKey, vclusterCertFileEnv) - if err != nil { - return fmt.Errorf("fail to bind viper key %q to environment variable %q: %w", certFileKey, vclusterCertFileEnv, err) + for key, envVar := range keyEnvVarMap { + err := viper.BindEnv(key, envVar) + if err != nil { + return fmt.Errorf("fail to bind viper key %q to environment variable %q: %w", key, envVar, err) + } } return nil } diff --git a/commands/cmd_base.go b/commands/cmd_base.go index 864d3c2..d883de6 100644 --- a/commands/cmd_base.go +++ b/commands/cmd_base.go @@ -35,6 +35,13 @@ const ( defConfigParamFileName = "config_param.json" ) +// all three tls modes enable TLS and present (client) cert if requested +const ( + tlsModeEnable = "enable" // skip validating peer (server) cert + tlsModeVerifyCA = "verify-ca" // validate peer cert signer chain, skipping hostname validation + tlsModeVerifyFull = "verify-full" // validate peer cert signer chain and hostname +) + /* CmdBase * * Basic/common fields of vcluster commands @@ -62,6 +69,28 @@ func (c *CmdBase) ValidateParseBaseOptions(opt *vclusterops.DatabaseOptions) err } } + // parse TLS mode. vclusterops allows different behavior for NMA and HTTPS conns, but + // for simplicity and lack of use case outside k8s, vcluster does not. + if globals.tlsMode != "" { + switch tlsMode := strings.ToLower(globals.tlsMode); tlsMode { + case tlsModeEnable: + opt.DoVerifyHTTPSServerCert = false + opt.DoVerifyNMAServerCert = false + opt.DoVerifyPeerCertHostname = false + case tlsModeVerifyCA: + opt.DoVerifyHTTPSServerCert = true + opt.DoVerifyNMAServerCert = true + opt.DoVerifyPeerCertHostname = false + case tlsModeVerifyFull: + opt.DoVerifyHTTPSServerCert = true + opt.DoVerifyNMAServerCert = true + opt.DoVerifyPeerCertHostname = true + default: + return fmt.Errorf("unrecognized TLS mode: %s. Allowed values are: '%s', '%s'", + globals.tlsMode, tlsModeEnable, tlsModeVerifyCA) + } + } + return nil } @@ -96,26 +125,13 @@ func (c *CmdBase) setCommonFlags(cmd *cobra.Command, flags []string) { false, "Whether show the details of VCluster run in the console", ) - // keyFile and certFile are flags that all subcommands require, - // except for create_connection and manage_config show - if cmd.Name() != configShowSubCmd && cmd.Name() != createConnectionSubCmd { - cmd.Flags().StringVar( - &globals.keyFile, - keyFileFlag, - "", - fmt.Sprintf("Path to the key file, the default value is %s", filepath.Join(vclusterops.CertPathBase, "{username}.key")), - ) - markFlagsFileName(cmd, map[string][]string{keyFileFlag: {"key"}}) - cmd.Flags().StringVar( - &globals.certFile, - certFileFlag, - "", - fmt.Sprintf("Path to the cert file, the default value is %s", filepath.Join(vclusterops.CertPathBase, "{username}.pem")), - ) - markFlagsFileName(cmd, map[string][]string{certFileFlag: {"pem", "crt"}}) - cmd.MarkFlagsRequiredTogether(keyFileFlag, certFileFlag) + // TLS related flags are allowed by all subcommands, + // except for create_connection and manage_config show. + if cmd.Name() != configShowSubCmd && cmd.Name() != createConnectionSubCmd { + c.setTLSFlags(cmd) } + if util.StringInArray(outputFileFlag, flags) { cmd.Flags().StringVarP( &c.output, @@ -225,6 +241,47 @@ func (c *CmdBase) setConfigFlags(cmd *cobra.Command, flags []string) { } } +// setTLSFlags sets the TLS options in global variables for later processing +// into vclusterops options. +func (c *CmdBase) setTLSFlags(cmd *cobra.Command) { + // vcluster CLI reads certs into memory before calling vclusterops only + // if non-default values are specified, which is why the defaults here + // are "" despite being listed otherwise in the help messages. + // Those defaults are used by vclusterops if no in-memory certs are provided. + cmd.Flags().StringVar( + &globals.keyFile, + keyFileFlag, + "", + fmt.Sprintf("Path to the key file, the default value is %s", filepath.Join(vclusterops.CertPathBase, "{username}.key")), + ) + markFlagsFileName(cmd, map[string][]string{keyFileFlag: {"key"}}) + + cmd.Flags().StringVar( + &globals.certFile, + certFileFlag, + "", + fmt.Sprintf("Path to the cert file, the default value is %s", filepath.Join(vclusterops.CertPathBase, "{username}.pem")), + ) + markFlagsFileName(cmd, map[string][]string{certFileFlag: {"pem", "crt"}}) + cmd.MarkFlagsRequiredTogether(keyFileFlag, certFileFlag) + + cmd.Flags().StringVar( + &globals.caCertFile, + caCertFileFlag, + "", + fmt.Sprintf("Path to the trusted CA cert file, the default value is %s", filepath.Join(vclusterops.CertPathBase, "rootca.pem")), + ) + markFlagsFileName(cmd, map[string][]string{caCertFileFlag: {"pem", "crt"}}) + + cmd.Flags().StringVar( + &globals.tlsMode, + tlsModeFlag, + "", + fmt.Sprintf("Mode for TLS validation. Allowed values '%s', '%s', and '%s'. Default value is '%s'.", + tlsModeEnable, tlsModeVerifyCA, tlsModeVerifyFull, tlsModeEnable), + ) +} + func (c *CmdBase) initConfigParam() error { // We need to find the path to the config param. The order of precedence is as follows: // 1. Option @@ -462,6 +519,7 @@ func (c *CmdBase) initCmdOutputFile() (*os.File, error) { // getCertFilesFromPaths will update cert and key file from cert path options func (c *CmdBase) getCertFilesFromCertPaths(opt *vclusterops.DatabaseOptions) error { + // TODO don't make this conditional on not using a PW for auth (see callers) if globals.certFile != "" { certData, err := os.ReadFile(globals.certFile) if err != nil { @@ -476,5 +534,12 @@ func (c *CmdBase) getCertFilesFromCertPaths(opt *vclusterops.DatabaseOptions) er } opt.Key = string(keyData) } + if globals.caCertFile != "" { + caCertData, err := os.ReadFile(globals.caCertFile) + if err != nil { + return fmt.Errorf("failed to read trusted CA certificate file: %w", err) + } + opt.CaCert = string(caCertData) + } return nil } diff --git a/commands/cmd_scrutinize.go b/commands/cmd_scrutinize.go index cdce8ed..86b2faa 100644 --- a/commands/cmd_scrutinize.go +++ b/commands/cmd_scrutinize.go @@ -217,7 +217,6 @@ func (c *CmdScrutinize) validateParse(logger vlog.Printer) error { } } - // parses host list and ipv6 - eon is irrelevant but handled err := c.ValidateParseBaseOptions(&c.sOptions.DatabaseOptions) if err != nil { return err diff --git a/vclusterops/cluster_op.go b/vclusterops/cluster_op.go index 1f2c854..16efbf0 100644 --- a/vclusterops/cluster_op.go +++ b/vclusterops/cluster_op.go @@ -197,7 +197,7 @@ type clusterOp interface { logExecute() logFinalize() setupBasicInfo() - loadCertsIfNeeded(tlsOptions opTLSOptions) error + applyTLSOptions(tlsOptions opTLSOptions) error isSkipExecute() bool filterUnreachableHosts(execContext *opEngineExecContext) } @@ -385,11 +385,13 @@ func (op *opBase) runExecute(execContext *opEngineExecContext) error { type opTLSOptions interface { hasCerts() bool getCerts() *httpsCerts + getTLSModes() *tlsModes } -// if found certs in the options, we add the certs to http requests of each instruction -func (op *opBase) loadCertsIfNeeded(tlsOptions opTLSOptions) error { - if tlsOptions == nil || !tlsOptions.hasCerts() { +// applyTLSOptions processes TLS options here, like in-memory certificates or TLS modes, +// rather than plumbing them through to every op. +func (op *opBase) applyTLSOptions(tlsOptions opTLSOptions) error { + if tlsOptions == nil { return nil } @@ -399,14 +401,25 @@ func (op *opBase) loadCertsIfNeeded(tlsOptions opTLSOptions) error { } // retrieve certs once to avoid extra copies - certs := tlsOptions.getCerts() - if certs == nil { - return fmt.Errorf("[%s] is trying to use certificates, but none are set", op.name) + var certs *httpsCerts + if tlsOptions.hasCerts() { + certs = tlsOptions.getCerts() + if certs == nil { + return fmt.Errorf("[%s] is trying to use certificates, but none are set", op.name) + } + } + + // always retrieve TLS modes + tlsModes := tlsOptions.getTLSModes() + if tlsModes == nil { + return fmt.Errorf("[%s] unable to retrieve TLS modes from interface", op.name) } + // modify requests with TLS options for host := range op.clusterHTTPRequest.RequestCollection { request := op.clusterHTTPRequest.RequestCollection[host] request.setCerts(certs) + request.setTLSMode(tlsModes) op.clusterHTTPRequest.RequestCollection[host] = request } return nil diff --git a/vclusterops/cluster_op_engine.go b/vclusterops/cluster_op_engine.go index 3966e38..d38f9dc 100644 --- a/vclusterops/cluster_op_engine.go +++ b/vclusterops/cluster_op_engine.go @@ -78,12 +78,12 @@ func (opEngine *VClusterOpEngine) runInstruction( // start the progress spinner op.startSpinner() - err = op.loadCertsIfNeeded(opEngine.tlsOptions) + err = op.applyTLSOptions(opEngine.tlsOptions) if err != nil { // here we do not return an error as the spinner error does not // affect the functionality op.stopFailSpinnerWithMessage(err.Error()) - return fmt.Errorf("loadCertsIfNeeded for %s failed, details: %w", op.getName(), err) + return fmt.Errorf("applying TLS options for %s failed, details: %w", op.getName(), err) } // execute an instruction diff --git a/vclusterops/http_adapter.go b/vclusterops/http_adapter.go index 83daac5..a592e26 100644 --- a/vclusterops/http_adapter.go +++ b/vclusterops/http_adapter.go @@ -178,17 +178,18 @@ func (*responseBodyReader) processResponseBody(resp *http.Response) (bodyString } func (downloader *responseBodyDownloader) processResponseBody(resp *http.Response) (bodyString string, err error) { - if isSuccess(resp) { - bytesWritten, err := downloader.downloadFile(resp) - if err != nil { - err = fmt.Errorf("fail to stream the response body to file %s: %w", downloader.destFilePath, err) - } else { - downloader.logger.Info("File downloaded", "File", downloader.destFilePath, "Bytes", bytesWritten) - } - return "", err + if !isSuccess(resp) { + // in case of error, we get an RFC7807 error, not a file + return readResponseBody(resp) } - // in case of error, we get an RFC7807 error, not a file - return readResponseBody(resp) + + bytesWritten, err := downloader.downloadFile(resp) + if err != nil { + err = fmt.Errorf("fail to stream the response body to file %s: %w", downloader.destFilePath, err) + } else { + downloader.logger.Info("File downloaded", "File", downloader.destFilePath, "Bytes", bytesWritten) + } + return "", err } // downloadFile uses buffered read/writes to download the http response body to a file @@ -205,8 +206,7 @@ func (downloader *responseBodyDownloader) downloadFile(resp *http.Response) (byt func readResponseBody(resp *http.Response) (bodyString string, err error) { bodyBytes, err := io.ReadAll(resp.Body) if err != nil { - err = fmt.Errorf("fail to read the response body: %w", err) - return "", err + return "", fmt.Errorf("fail to read the response body: %w", err) } bodyString = string(bodyBytes) @@ -343,8 +343,6 @@ func (adapter *httpAdapter) setupHTTPClient( request *hostHTTPRequest, usePassword bool, _ chan<- hostHTTPResult) (*http.Client, error) { - var client *http.Client - // set up request timeout requestTimeout := time.Duration(defaultRequestTimeout) if request.Timeout > 0 { @@ -353,17 +351,15 @@ func (adapter *httpAdapter) setupHTTPClient( requestTimeout = time.Duration(0) // a Timeout of zero means no timeout. } + client := &http.Client{Timeout: time.Second * requestTimeout} + var config *tls.Config + if usePassword { // TODO: we have to use `InsecureSkipVerify: true` here, // as password is used //nolint:gosec - client = &http.Client{ - Timeout: time.Second * requestTimeout, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - }, + config = &tls.Config{ + InsecureSkipVerify: true, } } else { var cert tls.Certificate @@ -377,25 +373,73 @@ func (adapter *httpAdapter) setupHTTPClient( if err != nil { return client, err } - // for both http and nma, we have to use `InsecureSkipVerify: true` here - // because the certs are self signed at this time - // TODO: update the InsecureSkipVerify once we start to use non-self-signed certs + // by default, skip peer certificate validation, but allow overrides //nolint:gosec - client = &http.Client{ - Timeout: time.Second * requestTimeout, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - Certificates: []tls.Certificate{cert}, - RootCAs: caCertPool, - InsecureSkipVerify: true, - }, - }, + config = &tls.Config{ + Certificates: []tls.Certificate{cert}, + RootCAs: caCertPool, + InsecureSkipVerify: true, + } + if request.TLSDoVerify { + if request.TLSDoVerifyHostname { + // use the built-in golang verification process to validate certificate signer chain + // and hostname + config.InsecureSkipVerify = false + } else { + // Note that hosts at this point are IP addresses, so verify-full may be impractical + // or impossible due to the complications of issuing certificates valid for IPs. + // Hence the custom validator skipping hostname validation. + config.VerifyPeerCertificate = adapter.generateTLSVerifyFunc(caCertPool) + } } } + + client.Transport = &http.Transport{TLSClientConfig: config} return client, nil } +// generateTLSVerifyFunc returns a callback function suitable for use as the VerifyPeerCertificate +// field of a tls.Config struct. It is a slightly less performant but logically equivalent version of +// the validation logic which gets run when InsecureSkipVerify == false in go v1.20.11. The difference +// is that hostname validation is elided, which is not possible without custom verification. +// +// See crypto/x509/verify.go for hostname validation behavior and crypto/tls/handshake_client.go for +// the reference implementation of this function. +func (*httpAdapter) generateTLSVerifyFunc(rootCAs *x509.CertPool) func([][]byte, [][]*x509.Certificate) error { + return func(certificates [][]byte, _ [][]*x509.Certificate) error { + // Reparse certs. The crypto/tls package version does some extra checks, but they're already + // done by this point, so no need to repeat them. It also uses a cache to reduce parsing, which + // isn't included here, but could be if there is a perf issue. + certs := make([]*x509.Certificate, len(certificates)) + for i, asn1Data := range certificates { + cert, err := x509.ParseCertificate(asn1Data) + if err != nil { + return err + } + certs[i] = cert + } + + // construct verification options like reference implementation, minus hostname + opts := x509.VerifyOptions{ + Roots: rootCAs, + CurrentTime: time.Now(), + DNSName: "", + Intermediates: x509.NewCertPool(), + } + + for _, cert := range certs[1:] { + opts.Intermediates.AddCert(cert) + } + _, err := certs[0].Verify(opts) + if err != nil { + return &tls.CertificateVerificationError{UnverifiedCertificates: certs, Err: err} + } + + return nil + } +} + func buildQueryParamString(queryParams map[string]string) string { var queryParamString string if len(queryParams) == 0 { diff --git a/vclusterops/http_request.go b/vclusterops/http_request.go index 391e88c..c37d1f7 100644 --- a/vclusterops/http_request.go +++ b/vclusterops/http_request.go @@ -27,8 +27,10 @@ type hostHTTPRequest struct { Timeout int // optional, set it if an Op needs longer time to complete // optional, for calling NMA/Vertica HTTPS endpoints. If Username/Password is set, that takes precedence over this for HTTPS calls. - UseCertsInOptions bool - Certs httpsCerts + UseCertsInOptions bool + Certs httpsCerts + TLSDoVerify bool + TLSDoVerifyHostname bool } type httpsCerts struct { @@ -37,13 +39,37 @@ type httpsCerts struct { caCert string } +type tlsModes struct { + doVerifyNMAServerCert bool + doVerifyHTTPSServerCert bool + doVerifyPeerCertHostname bool +} + func (req *hostHTTPRequest) setCerts(certs *httpsCerts) { + if certs == nil { + return + } req.UseCertsInOptions = true req.Certs.key = certs.key req.Certs.cert = certs.cert req.Certs.caCert = certs.caCert } +func (req *hostHTTPRequest) setTLSMode(modes *tlsModes) { + if modes == nil { + return + } + if req.IsNMACommand { + req.TLSDoVerify = modes.doVerifyNMAServerCert + } else { + req.TLSDoVerify = modes.doVerifyHTTPSServerCert + } + // only do hostname validation if regular validation is enabled + if req.TLSDoVerify { + req.TLSDoVerifyHostname = modes.doVerifyPeerCertHostname + } +} + func (req *hostHTTPRequest) buildNMAEndpoint(url string) { req.IsNMACommand = true req.Endpoint = NMACurVersion + url diff --git a/vclusterops/https_poll_subcluster_node_state_op.go b/vclusterops/https_poll_subcluster_node_state_op.go index 37c71ce..a4e2aca 100644 --- a/vclusterops/https_poll_subcluster_node_state_op.go +++ b/vclusterops/https_poll_subcluster_node_state_op.go @@ -108,6 +108,12 @@ func (op *httpsPollSubclusterNodeStateOp) prepare(execContext *opEngineExecConte op.hosts = append(op.hosts, vnode.Address) } } + // if there are still no hosts to check, e.g. empty subcluster, skip the op + if len(op.hosts) == 0 { + op.logger.PrintInfo("[%s] No nodes to poll for. Skipping operation.", op.name) + op.skipExecute = true + return nil + } execContext.dispatcher.setup(op.hosts) return op.setupClusterHTTPRequest(op.hosts) } diff --git a/vclusterops/https_stage_system_tables_op.go b/vclusterops/https_stage_system_tables_op.go index cdb63c9..76d956e 100644 --- a/vclusterops/https_stage_system_tables_op.go +++ b/vclusterops/https_stage_system_tables_op.go @@ -215,7 +215,7 @@ func (op *httpsStageSystemTablesOp) execute(execContext *opEngineExecContext) er if err := op.setupClusterHTTPRequest(op.hosts, systemTableInfo.Schema, systemTableInfo.TableName); err != nil { return err } - if err := op.opBase.loadCertsIfNeeded(op.tlsOptions); err != nil { + if err := op.opBase.applyTLSOptions(op.tlsOptions); err != nil { return err } op.logger.Info("Staging System Table:", "Schema", systemTableInfo.Schema, "Table", systemTableInfo.TableName) @@ -263,9 +263,10 @@ func (op *httpsStageSystemTablesOp) finalize(_ *opEngineExecContext) error { return nil } -// loadCertsIfNeeded shadows the op base function and stashes the certs instead of immediately setting them, -// as httpsStageSystemTablesOp delays creation of request objects and resets them repeatedly -func (op *httpsStageSystemTablesOp) loadCertsIfNeeded(tlsOptions opTLSOptions) error { +// applyTLSOptions shadows the op base function and stashes the interface providing certificates +// and other TLS options (like modes) instead of immediately setting them, as httpsStageSystemTablesOp +// delays creation of request objects and resets them repeatedly +func (op *httpsStageSystemTablesOp) applyTLSOptions(tlsOptions opTLSOptions) error { op.tlsOptions = tlsOptions return nil } diff --git a/vclusterops/remove_node.go b/vclusterops/remove_node.go index ef07c05..5210986 100644 --- a/vclusterops/remove_node.go +++ b/vclusterops/remove_node.go @@ -408,7 +408,7 @@ func (vcc VClusterCommands) produceRemoveNodeInstructions(vdb *VCoordinationData return instructions, err } - httpsReloadSpreadOp, err := makeHTTPSReloadSpreadOpWithInitiator(initiatorHost, true, username, password) + httpsReloadSpreadOp, err := makeHTTPSReloadSpreadOpWithInitiator(initiatorHost, usePassword, username, password) if err != nil { return instructions, err } @@ -422,7 +422,7 @@ func (vcc VClusterCommands) produceRemoveNodeInstructions(vdb *VCoordinationData instructions = append(instructions, &nmaHealthOp, &nmaDeleteDirectoriesOp) if vdb.IsEon { - httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, true, username, password, RemoveNodeSyncCat) + httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(initiatorHost, usePassword, username, password, RemoveNodeSyncCat) if err != nil { return instructions, err } diff --git a/vclusterops/start_node.go b/vclusterops/start_node.go index 819a19d..d30a617 100644 --- a/vclusterops/start_node.go +++ b/vclusterops/start_node.go @@ -37,8 +37,7 @@ type VStartNodesOptions struct { // you may not want to have both the NMA and Vertica server in the same container. // This feature requires version 24.2.0+. StartUpConf string - - vdb *VCoordinationDatabase + vdb *VCoordinationDatabase } type VStartNodesInfo struct { @@ -59,6 +58,10 @@ type VStartNodesInfo struct { unreachableHosts []string // is start subcluster command isStartSc bool + // use this when up host count is lesser than hosts to be re-ip'd + SerialReIP bool + // Number of up hosts + upHostCount int } func VStartNodesOptionsFactory() VStartNodesOptions { @@ -301,12 +304,16 @@ func (vcc VClusterCommands) VStartNodes(options *VStartNodesOptions) error { func (options *VStartNodesOptions) checkQuorum(vdb *VCoordinationDatabase, restartNodeInfo *VStartNodesInfo) error { sandboxPrimaryUpNodes := []string{} var lenOfPrimaryReIPLIst int + upHostCount := 0 reIPMap := make(map[string]bool, len(restartNodeInfo.ReIPList)) for _, name := range restartNodeInfo.NodeNamesToStart { reIPMap[name] = true } for _, vnode := range vdb.HostNodeMap { if vnode.IsPrimary { + if vnode.State == util.NodeUpState { + upHostCount++ + } if vnode.State == util.NodeUpState && vnode.Sandbox == restartNodeInfo.Sandbox { sandboxPrimaryUpNodes = append(sandboxPrimaryUpNodes, vnode.Address) } @@ -315,6 +322,10 @@ func (options *VStartNodesOptions) checkQuorum(vdb *VCoordinationDatabase, resta } } } + restartNodeInfo.upHostCount = upHostCount + if upHostCount < len(restartNodeInfo.ReIPList) { + restartNodeInfo.SerialReIP = true + } if len(sandboxPrimaryUpNodes) <= lenOfPrimaryReIPLIst { return &ReIPNoClusterQuorumError{ Detail: fmt.Sprintf("Quorum check failed: %d up node(s) is/are not enough to re-ip %d primary node(s)", @@ -346,14 +357,12 @@ func (options *VStartNodesOptions) checkQuorum(vdb *VCoordinationDatabase, resta func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartNodesInfo, options *VStartNodesOptions, vdb *VCoordinationDatabase) ([]clusterOp, error) { var instructions []clusterOp - nmaHealthOp := makeNMAHealthOpSkipUnreachable(options.Hosts) // need username for https operations err := options.setUsePasswordAndValidateUsernameIfNeeded(vcc.Log) if err != nil { return instructions, err } - httpsGetUpNodesOp, err := makeHTTPSGetUpNodesOp(options.DBName, options.Hosts, options.usePassword, options.UserName, options.Password, StartNodeCmd) if err != nil { @@ -363,15 +372,31 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN &nmaHealthOp, &httpsGetUpNodesOp, ) - // If we identify any nodes that need re-IP, HostsToStart will contain the nodes that need re-IP. // Otherwise, HostsToStart will consist of all hosts with IPs recorded in the catalog, which are provided by user input. if len(startNodeInfo.ReIPList) != 0 { nmaNetworkProfileOp := makeNMANetworkProfileOp(startNodeInfo.ReIPList) - httpsReIPOp, e := makeHTTPSReIPOp(startNodeInfo.NodeNamesToStart, startNodeInfo.ReIPList, - options.usePassword, options.UserName, options.Password) - if e != nil { - return instructions, e + instructions = append(instructions, &nmaNetworkProfileOp) + if startNodeInfo.SerialReIP { + // when we have lesser up(initiator) hosts than nodes to reip, we send reip requests in chunks of upHostCount size + var reipOps []clusterOp + chunkedNodeNamesTostart, chunkedReipList := getChunkedNodeLists(startNodeInfo) + for i, hostChunk := range chunkedReipList { + ReIPOp, e := makeHTTPSReIPOp(chunkedNodeNamesTostart[i], hostChunk, + options.usePassword, options.UserName, options.Password) + if e != nil { + return instructions, e + } + reipOps = append(reipOps, &ReIPOp) + } + instructions = append(instructions, reipOps...) + } else { + httpsReIPOp, e := makeHTTPSReIPOp(startNodeInfo.NodeNamesToStart, startNodeInfo.ReIPList, + options.usePassword, options.UserName, options.Password) + if e != nil { + return instructions, e + } + instructions = append(instructions, &httpsReIPOp) } // host is set to nil value in the reload spread step // we use information from node information to find the up host later @@ -386,18 +411,14 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN return instructions, e } instructions = append(instructions, - &nmaNetworkProfileOp, - &httpsReIPOp, &httpsReloadSpreadOp, &httpsGetNodesInfoOp, ) } - // require to have the same vertica version nmaVerticaVersionOp := makeNMAVerticaVersionOpBeforeStartNode(vdb, startNodeInfo.unreachableHosts, startNodeInfo.HostsToStart, startNodeInfo.isStartSc) instructions = append(instructions, &nmaVerticaVersionOp) - // The second parameter (sourceConfHost) in produceTransferConfigOps is set to a nil value in the upload and download step // we use information from v1/nodes endpoint to get all node information to update the sourceConfHost value // after we find any UP primary nodes as source host for syncing spread.conf and vertica.conf @@ -407,26 +428,24 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN nil, /*source hosts for transferring configuration files*/ startNodeInfo.HostsToStart, vdb) - httpsRestartUpCommandOp, err := makeHTTPSStartUpCommandWithSandboxOp(options.usePassword, options.UserName, options.Password, vdb, startNodeInfo.Sandbox) if err != nil { return instructions, err } - nmaStartNewNodesOp := makeNMAStartNodeOpWithVDB(startNodeInfo.HostsToStart, options.StartUpConf, vdb) httpsPollNodeStateOp, err := makeHTTPSPollNodeStateOp(startNodeInfo.HostsToStart, options.usePassword, options.UserName, options.Password, options.StatePollingTimeout) if err != nil { return instructions, err } + httpsPollNodeStateOp.cmdType = StartNodeCmd instructions = append(instructions, &httpsRestartUpCommandOp, &nmaStartNewNodesOp, &httpsPollNodeStateOp, ) - if vdb.IsEon { httpsSyncCatalogOp, err := makeHTTPSSyncCatalogOp(options.Hosts, options.usePassword, options.UserName, options.Password, StartNodeSyncCat) @@ -435,10 +454,22 @@ func (vcc VClusterCommands) produceStartNodesInstructions(startNodeInfo *VStartN } instructions = append(instructions, &httpsSyncCatalogOp) } - return instructions, nil } +func getChunkedNodeLists(startNodeInfo *VStartNodesInfo) (nodeNameChunks, reIPHostChunks [][]string) { + chunkSize := startNodeInfo.upHostCount + for i := 0; i < len(startNodeInfo.ReIPList); i += chunkSize { + end := i + chunkSize + if end > len(startNodeInfo.ReIPList) { + end = len(startNodeInfo.ReIPList) + } + nodeNameChunks = append(nodeNameChunks, startNodeInfo.NodeNamesToStart[i:end]) + reIPHostChunks = append(reIPHostChunks, startNodeInfo.ReIPList[i:end]) + } + return +} + // validateControlNode returns true if the host is a control node and error out // if the host is a non-control node with its corresponding control node not up or not to be started func (options *VStartNodesOptions) validateControlNode(host string, vdb *VCoordinationDatabase, diff --git a/vclusterops/vcluster_database_options.go b/vclusterops/vcluster_database_options.go index 8a11520..fe9d406 100644 --- a/vclusterops/vcluster_database_options.go +++ b/vclusterops/vcluster_database_options.go @@ -66,6 +66,12 @@ type DatabaseOptions struct { Cert string // TLS CA Certificate CaCert string + // Whether to validate NMA server cert signature chain + DoVerifyNMAServerCert bool + // Whether to validate HTTPS server cert signature chain + DoVerifyHTTPSServerCert bool + // Whether to validate server cert hostname if signature validation is enabled + DoVerifyPeerCertHostname bool /* part 4: other info */ @@ -421,4 +427,12 @@ func (opt *DatabaseOptions) getCerts() *httpsCerts { return &httpsCerts{key: opt.Key, cert: opt.Cert, caCert: opt.CaCert} } +func (opt *DatabaseOptions) getTLSModes() *tlsModes { + return &tlsModes{ + doVerifyNMAServerCert: opt.DoVerifyNMAServerCert, + doVerifyHTTPSServerCert: opt.DoVerifyHTTPSServerCert, + doVerifyPeerCertHostname: opt.DoVerifyPeerCertHostname, + } +} + /* End opTLSOptions interface */