From 953f390e636301f7854df149ab3a4b2094ae9f77 Mon Sep 17 00:00:00 2001 From: Lawrence Jones Date: Thu, 9 May 2019 09:27:54 +0100 Subject: [PATCH 1/3] keeper: support --wal-dir For those who run Postgres with a WAL directory symlinked to a separate disk (often for performance reasons) this commit adds a --wal-dir flag to the keeper. The symlink to a new WAL disk needs to be created as part of the database initialisation step, and also whenever we clobber the database, such as when taking a pg_basebackup (if pg_rewind is disabled/unavailable). By passing this directory as an argument to initdb and pg_basebackup, we can cover all the scenarios that the keeper erases and recreates the data directory, ensuring the symlink is always present. --- cmd/keeper/cmd/keeper.go | 6 +++++- doc/commands/stolon-keeper.md | 1 + internal/postgresql/postgresql.go | 14 +++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/cmd/keeper/cmd/keeper.go b/cmd/keeper/cmd/keeper.go index 3f57cb9c0..63f403e3e 100644 --- a/cmd/keeper/cmd/keeper.go +++ b/cmd/keeper/cmd/keeper.go @@ -93,6 +93,7 @@ type config struct { uid string dataDir string + walDir string debug bool pgListenAddress string pgAdvertiseAddress string @@ -124,6 +125,7 @@ func init() { CmdKeeper.PersistentFlags().StringVar(&cfg.uid, "id", "", "keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.") CmdKeeper.PersistentFlags().StringVar(&cfg.uid, "uid", "", "keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.") CmdKeeper.PersistentFlags().StringVar(&cfg.dataDir, "data-dir", "", "data directory") + CmdKeeper.PersistentFlags().StringVar(&cfg.walDir, "wal-dir", "", "wal directory") CmdKeeper.PersistentFlags().StringVar(&cfg.pgListenAddress, "pg-listen-address", "", "postgresql instance listening address, local address used for the postgres instance. For all network interface, you can set the value to '*'.") CmdKeeper.PersistentFlags().StringVar(&cfg.pgAdvertiseAddress, "pg-advertise-address", "", "postgresql instance address from outside. Use it to expose ip different than local ip with a NAT networking config") CmdKeeper.PersistentFlags().StringVar(&cfg.pgPort, "pg-port", "5432", "postgresql instance listening port") @@ -404,6 +406,7 @@ type PostgresKeeper struct { bootUUID string dataDir string + walDir string listenAddress string port string pgListenAddress string @@ -455,6 +458,7 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) { bootUUID: common.UUID(), dataDir: dataDir, + walDir: cfg.walDir, pgListenAddress: cfg.pgListenAddress, pgAdvertiseAddress: cfg.pgAdvertiseAddress, @@ -742,7 +746,7 @@ func (p *PostgresKeeper) Start(ctx context.Context) { // TODO(sgotti) reconfigure the various configurations options // (RequestTimeout) after a changed cluster config - pgm := postgresql.NewManager(p.pgBinPath, p.dataDir, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUAuthMethod, p.pgSUUsername, p.pgSUPassword, p.pgReplAuthMethod, p.pgReplUsername, p.pgReplPassword, p.requestTimeout) + pgm := postgresql.NewManager(p.pgBinPath, p.dataDir, p.walDir, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUAuthMethod, p.pgSUUsername, p.pgSUPassword, p.pgReplAuthMethod, p.pgReplUsername, p.pgReplPassword, p.requestTimeout) p.pgm = pgm p.pgm.StopIfStarted(true) diff --git a/doc/commands/stolon-keeper.md b/doc/commands/stolon-keeper.md index 768ce5c57..29635b5a2 100644 --- a/doc/commands/stolon-keeper.md +++ b/doc/commands/stolon-keeper.md @@ -15,6 +15,7 @@ stolon-keeper [flags] ``` --cluster-name string cluster name --data-dir string data directory + --wal-dir string data directory -h, --help help for stolon-keeper --kube-resource-kind string the k8s resource kind to be used to store stolon clusterdata and do sentinel leader election (only "configmap" is currently supported) --log-color enable color in log output (default if attached to a terminal) diff --git a/internal/postgresql/postgresql.go b/internal/postgresql/postgresql.go index c7e0ab4a4..6ef7e6c49 100644 --- a/internal/postgresql/postgresql.go +++ b/internal/postgresql/postgresql.go @@ -56,6 +56,7 @@ var log = slog.S() type Manager struct { pgBinPath string dataDir string + walDir string parameters common.Parameters recoveryParameters common.Parameters hba []string @@ -95,9 +96,10 @@ func SetLogger(l *zap.SugaredLogger) { log = l } -func NewManager(pgBinPath string, dataDir string, localConnParams, replConnParams ConnParams, suAuthMethod, suUsername, suPassword, replAuthMethod, replUsername, replPassword string, requestTimeout time.Duration) *Manager { +func NewManager(pgBinPath string, dataDir, walDir string, localConnParams, replConnParams ConnParams, suAuthMethod, suUsername, suPassword, replAuthMethod, replUsername, replPassword string, requestTimeout time.Duration) *Manager { return &Manager{ pgBinPath: pgBinPath, + walDir: walDir, dataDir: filepath.Join(dataDir, "postgres"), parameters: make(common.Parameters), recoveryParameters: make(common.Parameters), @@ -181,6 +183,13 @@ func (p *Manager) Init(initConfig *InitConfig) error { } log.Debugw("execing cmd", "cmd", cmd) + // initdb supports configuring a separate wal directory via symlinks. Normally this + // parameter might be part of the initConfig, but it will also be required whenever we + // fall-back to a pg_basebackup during a re-sync, which is why it's a Manager field. + if p.walDir != "" { + cmd.Args = append(cmd.Args, "--waldir", p.walDir) + } + if initConfig.Locale != "" { cmd.Args = append(cmd.Args, "--locale", initConfig.Locale) } @@ -827,6 +836,9 @@ func (p *Manager) SyncFromFollowed(followedConnParams ConnParams, replSlot strin if replSlot != "" { args = append(args, "--slot", replSlot) } + if p.walDir != "" { + args = append(args, "--waldir", p.walDir) + } cmd := exec.Command(name, args...) cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSFILE=%s", pgpass.Name())) From 77cd0b706daf0100273bfe7233c7f2e10c86512b Mon Sep 17 00:00:00 2001 From: Lawrence Jones Date: Fri, 10 May 2019 10:30:32 +0100 Subject: [PATCH 2/3] Support waldir in pitr command For those who use different devices for wal and data directory, we should support the interpolation of the wal directory location in the pitr command as we do the data directory. This allows people to supply commands like: ``` pg_basebackup --pgdata %d --waldir %w ... ``` And have both data directory and wal directory templated. pg_basebackup and initdb are commands that support the concept of waldir, but this could be useful for any arbitrary pitr command. The commit also simplifies the implementation of the expand function to feature less mutation and slice offset accesses. --- doc/pitr.md | 2 +- internal/postgresql/postgresql.go | 2 +- internal/postgresql/utils.go | 33 ++++++++++++------------------- internal/postgresql/utils_test.go | 8 ++++++-- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/doc/pitr.md b/doc/pitr.md index a82ae1a71..0bd7a4c22 100644 --- a/doc/pitr.md +++ b/doc/pitr.md @@ -37,7 +37,7 @@ Note: the `\"` is needed by json to put double quotes inside strings. We aren't When initializing a cluster in pitr init mode a random registered keeper will be choosed and it'll start restoring the database with these steps: * Remove the current data directory -* Call the `dataRestoreCommand` expanding every %d to the data directory full path. If it exits with a non zero exit code then stop here since something went wrong. +* Call the `dataRestoreCommand` expanding every %d to the data directory full path and every %w to the wal directory full path (if wal directory is provided to the keeper). If it exits with a non zero exit code then stop here since something went wrong. * Create a `recovery.conf` with the right parameters and with `restore_command` set to `restoreCommand`. * Start the postgres instance and wait for the archive recovery. diff --git a/internal/postgresql/postgresql.go b/internal/postgresql/postgresql.go index 6ef7e6c49..eebe85b34 100644 --- a/internal/postgresql/postgresql.go +++ b/internal/postgresql/postgresql.go @@ -218,7 +218,7 @@ func (p *Manager) Restore(command string) error { var err error var cmd *exec.Cmd - command = expand(command, p.dataDir) + command = expandRecoveryCommand(command, p.dataDir, p.walDir) if err = os.MkdirAll(p.dataDir, 0700); err != nil { err = fmt.Errorf("cannot create data dir: %v", err) diff --git a/internal/postgresql/utils.go b/internal/postgresql/utils.go index 5b8c736d4..9dae07a8c 100644 --- a/internal/postgresql/utils.go +++ b/internal/postgresql/utils.go @@ -317,27 +317,20 @@ func fileExists(path string) (bool, error) { return true, nil } -func expand(s, dataDir string) string { - buf := make([]byte, 0, 2*len(s)) - // %d %% are all ASCII, so bytes are fine for this operation. - i := 0 - for j := 0; j < len(s); j++ { - if s[j] == '%' && j+1 < len(s) { - switch s[j+1] { - case 'd': - buf = append(buf, s[i:j]...) - buf = append(buf, []byte(dataDir)...) - j += 1 - i = j + 1 - case '%': - j += 1 - buf = append(buf, s[i:j]...) - i = j + 1 - default: - } +// expandRecoveryCommand substitues the data and wal directories into a point-in-time +// recovery command string. Any %d become the data directory, any %w become the wal +// directory and any literal % characters are escaped by themselves (%% -> %). +func expandRecoveryCommand(cmd, dataDir, walDir string) string { + return regexp.MustCompile(`%[dw%]`).ReplaceAllStringFunc(cmd, func(match string) string { + switch match[1] { + case 'd': + return dataDir + case 'w': + return walDir } - } - return string(buf) + s[i:] + + return "%" + }) } func getConfigFilePGParameters(ctx context.Context, connParams ConnParams) (common.Parameters, error) { diff --git a/internal/postgresql/utils_test.go b/internal/postgresql/utils_test.go index 0f7eedb31..8e2f3615d 100644 --- a/internal/postgresql/utils_test.go +++ b/internal/postgresql/utils_test.go @@ -89,7 +89,7 @@ func TestValidReplSlotName(t *testing.T) { } } -func TestExpand(t *testing.T) { +func TestExpandRecoveryCommand(t *testing.T) { tests := []struct { in string out string @@ -106,6 +106,10 @@ func TestExpand(t *testing.T) { in: "%d", out: "/datadir", }, + { + in: "%w", + out: "/waldir", + }, { in: "%%d", out: "%d", @@ -121,7 +125,7 @@ func TestExpand(t *testing.T) { } for i, tt := range tests { - out := expand(tt.in, "/datadir") + out := expandRecoveryCommand(tt.in, "/datadir", "/waldir") if out != tt.out { t.Errorf("#%d: wrong expanded string: got: %s, want: %s", i, out, tt.out) } From b634e534fc5311e3c75ac7b35cc29e00e7008152 Mon Sep 17 00:00:00 2001 From: Lawrence Jones Date: Fri, 10 May 2019 10:40:30 +0100 Subject: [PATCH 3/3] Also remove wal directory when destroying data If a wal directory is supplied then it's important to clean-out our wals along with our data directory. This commit renames the existing RemoveAll method to RemoveAllIfInitialized, which better represents what it does, and has it rely on a RemoveAll method that cleans-up both data directory and wal. --- cmd/keeper/cmd/keeper.go | 8 ++++---- internal/postgresql/postgresql.go | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/cmd/keeper/cmd/keeper.go b/cmd/keeper/cmd/keeper.go index 63f403e3e..d9f153896 100644 --- a/cmd/keeper/cmd/keeper.go +++ b/cmd/keeper/cmd/keeper.go @@ -830,7 +830,7 @@ func (p *PostgresKeeper) resync(db, followedDB *cluster.DB, tryPgrewind bool) er replSlot = common.StolonName(db.UID) } - if err := pgm.RemoveAll(); err != nil { + if err := pgm.RemoveAllIfInitialized(); err != nil { return fmt.Errorf("failed to remove the postgres data dir: %v", err) } if slog.IsDebug() { @@ -1023,7 +1023,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { } // Clean up cluster db datadir - if err = pgm.RemoveAll(); err != nil { + if err = pgm.RemoveAllIfInitialized(); err != nil { log.Errorw("failed to remove the postgres data dir", zap.Error(err)) return } @@ -1082,7 +1082,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { log.Errorw("failed to stop pg instance", zap.Error(err)) return } - if err = pgm.RemoveAll(); err != nil { + if err = pgm.RemoveAllIfInitialized(); err != nil { log.Errorw("failed to remove the postgres data dir", zap.Error(err)) return } @@ -1144,7 +1144,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { log.Errorw("failed to stop pg instance", zap.Error(err)) return } - if err = pgm.RemoveAll(); err != nil { + if err = pgm.RemoveAllIfInitialized(); err != nil { log.Errorw("failed to remove the postgres data dir", zap.Error(err)) return } diff --git a/internal/postgresql/postgresql.go b/internal/postgresql/postgresql.go index eebe85b34..698f07865 100644 --- a/internal/postgresql/postgresql.go +++ b/internal/postgresql/postgresql.go @@ -208,7 +208,7 @@ func (p *Manager) Init(initConfig *InitConfig) error { } // remove the dataDir, so we don't end with an half initialized database if err != nil { - os.RemoveAll(p.dataDir) + p.RemoveAll() return err } return nil @@ -237,7 +237,7 @@ func (p *Manager) Restore(command string) error { // On every error remove the dataDir, so we don't end with an half initialized database out: if err != nil { - os.RemoveAll(p.dataDir) + p.RemoveAll() return err } return nil @@ -853,7 +853,7 @@ func (p *Manager) SyncFromFollowed(followedConnParams ConnParams, replSlot strin return nil } -func (p *Manager) RemoveAll() error { +func (p *Manager) RemoveAllIfInitialized() error { initialized, err := p.IsInitialized() if err != nil { return fmt.Errorf("failed to retrieve instance state: %v", err) @@ -869,6 +869,17 @@ func (p *Manager) RemoveAll() error { if started { return fmt.Errorf("cannot remove postregsql database. Instance is active") } + + return p.RemoveAll() +} + +// RemoveAll entirely cleans up the data directory, including any wal directory if that +// exists outside of the data directory. +func (p *Manager) RemoveAll() error { + if p.walDir != "" { + os.RemoveAll(p.walDir) + } + return os.RemoveAll(p.dataDir) }