bcgov · brettedw · Dec 6, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/docs/database/CLUSTER_DB.MD b/docs/database/CLUSTER_DB.MD
@@ -53,24 +53,42 @@ Spin up the Repo-Standby Cluster with:
 
 - Anecdotally, spinning up a standby cluster for a 15GB database took about 5 minutes
 
-### Promoting a Standby Cluster
+##### Promoting a Standby Cluster
 
 Once a standby is stood up, it can be promoted to be the primary cluster. **Note: only do this if the existing primary has been shut down first.**
+You can shutdown the primary cluster with this command:  
+`kubectl patch postgrescluster/<cluster-name> --type merge --patch '{"spec":{"shutdown": true}}'`  
+You can determine that the cluster is fully shutdown when there are no StatefulSet pods running for that cluster.
 
 Promote the standby cluster by editing the [crunchy_standby.yaml](../../openshift/templates/crunchy_standby.yaml) to set the `standby` field to `false`.
+You can determine that promotion has completed when the logs of the standby StatefulSet show a leader has been elected.
 
 More details here: <https://access.crunchydata.com/documentation/postgres-operator/latest/architecture/disaster-recovery#promoting-a-standby-cluster>
 
+##### Database user privileges
+
+The promoted standby cluster created it's own secrets for connecting to pgbouncer and it has created a new user in the database with the same name as the cluster. Ex. if the standby cluster is named "wps-crunchy-16-20241210", the user will have the same name (this user won't be created until the standby is promoted).
+Once the standby has been promoted, the easiest way to update user privileges is to reassign table ownership from the old user to the new user:  
+`REASSIGN OWNED BY "<old-user>" TO "<new-user>";`  
+Use `\du` in psql to see users in the database.
+
+##### Update the prod deployment to use the new crunchydb cluster and pguser secret
+
+Create a PR with the following changes:
+
+- Update `CRUNCHY_NAME` in `envars` file to the new crunchydb cluster name (excluding the suffix)
+- Update `PATRONI_CLUSTER_NAME` in `deploy.yaml`.
+
 ## Cluster Restore From pg_dump
 
 In the event that the cluster can't be restored from pgbackrest you can create a new cluster and restore using a pg_dump from S3.
 
 ##### Deploy new cluster
 
-   ```
-   oc login --token=<your-token> --server=<openshift-api-url>
-   PROJ_TARGET=<namespace-license-plate> BUCKET=<s3-bucket> CPU_REQUEST=75m CPU_LIMIT=2000m MEMORY_REQUEST=2Gi MEMORY_LIMIT=16Gi DATA_SIZE=65Gi WAL_SIZE=45Gi bash ./oc_provision_crunchy.sh <suffix> apply
-   ```
+```
+oc login --token=<your-token> --server=<openshift-api-url>
+PROJ_TARGET=<namespace-license-plate> BUCKET=<s3-bucket> CPU_REQUEST=75m CPU_LIMIT=2000m MEMORY_REQUEST=2Gi MEMORY_LIMIT=16Gi DATA_SIZE=65Gi WAL_SIZE=45Gi bash ./oc_provision_crunchy.sh <suffix> apply
+```
 
 ##### Set superuser permissions in new cluster via OpenShift web GUI
 
@@ -90,7 +108,7 @@ PGUSER=$(oc get secrets -n <namespace-license-plate> "<wps-crunchydb-pguser-secr
 PGDATABASE=$(oc get secrets -n <namespace-license-plate> "<wps-crunchydb-pguser-secret-name>" -o go-template='{{.data.dbname | base64decode}}')
 oc -n <namespace-license-plate>  port-forward "${PG_CLUSTER_PRIMARY_POD}" 5432:5432
 ```
-  
+
 ##### Restore sql dump into new cluster in another shell
 
 Download the latest SQL dump from S3 storage and unzip it.
@@ -115,4 +133,3 @@ Create a PR with the following changes:
 
 - Update `CRUNCHY_NAME` in `envars` file to the new crunchydb cluster name (excluding the suffix)
 - Update `PATRONI_CLUSTER_NAME` in `deploy.yaml`.
-
diff --git a/openshift/scripts/oc_provision_crunchy_standby.sh b/openshift/scripts/oc_provision_crunchy_standby.sh
@@ -25,6 +25,9 @@ source "$(dirname ${0})/common/common"
 # Target project override for Dev or Prod deployments
 #
 PROJ_TARGET="${PROJ_TARGET:-${PROJ_DEV}}"
+
+# Set DATE to today's date if it isn't set
+DATE=${DATE:-$(date +"%Y%m%d")}
 
 # Prepare names for crunchy ephemeral instance for this PR.
 IMAGE_STREAM_NAMESPACE=${IMAGE_STREAM_NAMESPACE:-${PROJ_TOOLS}}
@@ -34,9 +37,11 @@ EPHEMERAL_STORAGE=${EPHEMERAL_STORAGE:-'False'}
 OC_PROCESS="oc -n ${PROJ_TARGET} process -f ${TEMPLATE_PATH}/crunchy_standby.yaml \
 -p SUFFIX=${SUFFIX} \
 -p TARGET_NAMESPACE=${PROJ_TARGET} \
+-p CRUNCHY_NAME=${CRUNCHY_NAME} \
 -p BUCKET=${BUCKET} \
--p DATA_SIZE=45Gi \
--p WAL_SIZE=15Gi \
+-p DATE=${DATE} \
+-p DATA_SIZE=${DATA_SIZE:-65Gi} \
+-p WAL_SIZE=${WAL_SIZE:-15Gi} \
  ${IMAGE_NAME:+ " -p IMAGE_NAME=${IMAGE_NAME}"} \
  ${IMAGE_TAG:+ " -p IMAGE_TAG=${IMAGE_TAG}"} \
  ${IMAGE_REGISTRY:+ " -p IMAGE_REGISTRY=${IMAGE_REGISTRY}"} \
@@ -46,6 +51,7 @@ OC_PROCESS="oc -n ${PROJ_TARGET} process -f ${TEMPLATE_PATH}/crunchy_standby.yam
  -p MEMORY_LIMIT=16Gi"
 
 
+
 # In order to avoid running out of storage quota in our development environment, use
 # ephemeral storage by removing the pvc request from the template.
 if [ "$EPHEMERAL_STORAGE" = "True" ]

diff --git a/openshift/templates/crunchy_standby.yaml b/openshift/templates/crunchy_standby.yaml
@@ -1,12 +1,12 @@
 apiVersion: template.openshift.io/v1
 kind: Template
 metadata:
-  name: wps-crunchydb-standby
+  name: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
   annotations:
-    "openshift.io/display-name": wps-crunchydb-standby
+    "openshift.io/display-name": ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
 labels:
-  app.kubernetes.io/part-of: wps-crunchydb-standby
-  app: wps-crunchydb-standby
+  app.kubernetes.io/part-of: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
+  app: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
 parameters:
   - description: Namespace in which database resides
     displayName: Target Namespace
@@ -15,6 +15,12 @@ parameters:
   - name: BUCKET
     description: S3 bucket name
     required: true
+  - name: CRUNCHY_NAME
+    description: Application name (wps - wildfire predictive services)
+    required: true
+  - name: DATE
+    description: Date the standby was created
+    required: true
   - name: DATA_SIZE
     description: Data PVC size
     required: true
@@ -60,23 +66,17 @@ objects:
   - apiVersion: postgres-operator.crunchydata.com/v1beta1
     kind: PostgresCluster
     metadata:
-      name: wps-crunchydb-standby
+      name: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
     spec:
       postgresVersion: 16
       postGISVersion: "3.3"
       metadata:
-        name: wps-crunchydb-standby
+        name: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
         labels:
-          app: wps-crunchydb-standby
+          app: ${CRUNCHY_NAME}-${DATE}-${SUFFIX}
       databaseInitSQL:
         key: init.sql
         name: wps-init-sql
-      users:
-        - name: wps
-          databases:
-            - postgres
-            - wps
-          options: "SUPERUSER"
       instances:
         - name: crunchy
           replicas: 1
@@ -104,20 +104,57 @@ objects:
       backups:
         pgbackrest:
           image: artifacts.developer.gov.bc.ca/bcgov-docker-local/crunchy-pgbackrest:ubi8-2.41-4
+          manual:
+            repoName: repo1
+            options:
+              - --type=full
           configuration:
             - secret:
                 name: crunchy-pgbackrest
                 items:
                   - key: conf
                     path: s3.conf
           global:
+            repo1-retention-full: "3"
+            repo1-retention-full-type: count
             repo1-path: /pgbackrest/${SUFFIX}/repo1
           repos:
             - name: repo1
+              schedules:
+                full: "0 1 * * 0"
+                differential: "0 1 * * 1-6"
               s3:
                 bucket: ${BUCKET}
                 endpoint: nrs.objectstore.gov.bc.ca
                 region: "ca-central-1"
+      proxy:
+        pgBouncer:
+          image: artifacts.developer.gov.bc.ca/bcgov-docker-local/crunchy-pgbouncer:ubi8-1.21-0
+          affinity:
+            podAntiAffinity:
+              preferredDuringSchedulingIgnoredDuringExecution:
+                - podAffinityTerm:
+                    labelSelector:
+                      matchLabels:
+                        postgres-operator.crunchydata.com/cluster: db
+                        postgres-operator.crunchydata.com/role: pgbouncer
+                    topologyKey: kubernetes.io/hostname
+                  weight: 1
+          config:
+            global:
+              pool_mode: transaction
+              ignore_startup_parameters: options, extra_float_digits
+              max_prepared_statements: "10"
+              max_client_conn: "1000"
+          port: 5432
+          replicas: 1
+          resources:
+            limits:
+              cpu: 500m
+              memory: 3Gi
+            requests:
+              cpu: 100m
+              memory: 1Gi
       standby:
         enabled: true
         repoName: repo1