Skip to content

Commit

Permalink
Restore prometheus (#373)
Browse files Browse the repository at this point in the history
Bug: T356769
  • Loading branch information
vivian-rook authored Feb 9, 2024
1 parent a9d74fd commit ae139ec
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,6 @@ https://wikitech.wikimedia.org/wiki/PAWS/Admin#Deployment
If the entire project is removed two parts of paws are not managed by tofu/ansible.
Object storage container: An object storage container named "tofu-state" will need to be generated in horizon. This is where the state file for tofu resides.
NFS: The NFS server is not included. And a fresh NFS server will be needed for paws to operate.

# backup prometheus
see ansible/files/prometheus-data.sh for example of backup/restore
15 changes: 15 additions & 0 deletions ansible/files/prometheus-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

export KUBECONFIG=/tmp/kube.config.previous
PREVIOUS_POD=$(kubectl get pods -n metrics --selector=app.kubernetes.io/component=server --no-headers -o custom-columns=":metadata.name")
kubectl -n metrics exec -it pod/${PREVIOUS_POD} -c prometheus-server -- tar cfz backup.tar.gz /data
kubectl cp metrics/${PREVIOUS_POD}:backup.tar.gz /tmp/prometheus.tar.gz -c prometheus-server

sleep 150 # make a little gap of time to keep data from overlapping

export KUBECONFIG=/tmp/kube.config.current
CURRENT_POD=$(kubectl get pods -n metrics --selector=app.kubernetes.io/component=server --no-headers -o custom-columns=":metadata.name")
kubectl -n metrics wait --for=condition=ready pod -l app.kubernetes.io/component=server --timeout=600s
kubectl cp /tmp/prometheus.tar.gz metrics/${CURRENT_POD}:backup.tar.gz -c prometheus-server
kubectl -n metrics exec -it pod/${CURRENT_POD} -c prometheus-server -- sh -c 'rm -rf /data/* ; tar xfz backup.tar.gz -C /'
kubectl -n metrics rollout restart deployment.apps/prometheus-server
11 changes: 11 additions & 0 deletions ansible/paws.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@
- value: controller.config.allow-snippet-annotations=true
value_type: string

- name: Create metrics namespace for prometheus
kubernetes.core.k8s:
name: metrics
kind: Namespace
state: present
register: prometheus

- name: Prometheus
kubernetes.core.helm:
name: prometheus
Expand All @@ -85,6 +92,10 @@
template: "templates/prometheus-ingress.yaml.j2"
namespace: metrics

- name: Pull in previous prometheus data
ansible.builtin.script: files/prometheus-data.sh
when: prometheus.changed

- name: Add jupyterhub chart repo
kubernetes.core.helm_repository:
name: jupyterhub
Expand Down
4 changes: 4 additions & 0 deletions deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ fi

source secrets-${datacenter}.sh

# save current kube.config in case we need to transfer prometheus data
cp $(pwd)/tofu/kube.config /tmp/kube.config.previous || true # if it isn't there just keep going

python3 -m venv .venv/deploy
source .venv/deploy/bin/activate
pip install ansible==8.1.0 kubernetes==26.1.0
Expand All @@ -51,6 +54,7 @@ cd tofu
AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu init -backend-config=${datacenter}-backend.conf
AWS_ACCESS_KEY_ID=${ACCESS_KEY} AWS_SECRET_ACCESS_KEY=${SECRET_KEY} tofu apply -var datacenter=${datacenter}
export KUBECONFIG=$(pwd)/kube.config
cp $(pwd)/kube.config /tmp/kube.config.current

if [ "${tofuonly}" = '1' ]
then
Expand Down

0 comments on commit ae139ec

Please sign in to comment.