Skip to content

Commit

Permalink
Cleanup old helm-test k8s namespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
pdostal committed Aug 4, 2023
1 parent bff2e16 commit 2f872c8
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 30 deletions.
6 changes: 3 additions & 3 deletions cleanup_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def main():
providers = PCWConfig.get_providers_for("k8sclusters", namespace)
try:
if ProviderChoice.GCE in providers:
GKE(namespace).cleanup_k8s_jobs()
GKE(namespace).cleanup_k8s()
if ProviderChoice.EC2 in providers:
EKS(namespace).cleanup_k8s_jobs()
EKS(namespace).cleanup_k8s()
if ProviderChoice.AZURE in providers:
AKS(namespace).cleanup_k8s_jobs()
AKS(namespace).cleanup_k8s()
except Exception:
logger.exception("[%s] Cleanup failed!", namespace)

Expand Down
16 changes: 10 additions & 6 deletions ocw/lib/aks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import kubernetes
from ocw.lib.provider import Provider
from ocw.lib.k8s import clean_jobs
from ocw.lib.k8s import clean_jobs, clean_helm_namespaces
from webui.PCWConfig import PCWConfig


Expand Down Expand Up @@ -35,14 +35,18 @@ def kubectl_client(self, resource_group: str, cluster_name: str):
f"for resource-group {resource_group} : {res.stderr}")

kubernetes.config.load_kube_config(config_file=kubeconfig)
self.__kubectl_client[cluster_name] = kubernetes.client.BatchV1Api()
self.__kubectl_client[cluster_name] = kubernetes.client

return self.__kubectl_client[cluster_name]

def cleanup_k8s_jobs(self):
def cleanup_k8s(self):
clusters = PCWConfig.get_k8s_clusters_for_provider(self._namespace, "azure")
self.log_info(f"Cleanup k8s jobs in AKS clusters. {len(clusters)} will be queried ")
self.log_info(f"Cleanup AKS clusters. {len(clusters)} will be queried ")
for cluster in clusters:
self.log_info(f"Cleanup k8s jobs in AKS cluster {cluster['cluster_name']}")
client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"])
self.log_info(f"Cleaning jobs in AKS cluster {cluster['cluster_name']}")
client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"]).BatchV1Api()
clean_jobs(self, client, cluster["cluster_name"])

self.log_info(f"Cleaning namespaces in AKS cluster {cluster['cluster_name']}")
client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"]).CoreV1Api()
clean_helm_namespaces(self, client, cluster["cluster_name"])
16 changes: 10 additions & 6 deletions ocw/lib/eks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import boto3
from webui.PCWConfig import PCWConfig, ConfigFile
from ocw.lib.provider import Provider
from ocw.lib.k8s import clean_jobs
from ocw.lib.k8s import clean_jobs, clean_helm_namespaces

TAG_IGNORE = 'pcw_ignore'

Expand Down Expand Up @@ -79,7 +79,7 @@ def kubectl_client(self, region: str, cluster_name: str):
raise RuntimeError(f"Cannot get the kubeconfig for the cluster {cluster_name} on region {region}")

kubernetes.config.load_kube_config(config_file=kubeconfig)
self.__kubectl_client[region_cluster] = kubernetes.client.BatchV1Api()
self.__kubectl_client[region_cluster] = kubernetes.client

return self.__kubectl_client[region_cluster]

Expand Down Expand Up @@ -154,12 +154,16 @@ def delete_all_clusters(self) -> None:
self.log_info(f"Finally deleting {cluster} cluster")
self.eks_client(region).delete_cluster(name=cluster)

def cleanup_k8s_jobs(self):
self.log_info("Cleanup k8s jobs in EKS clusters")
def cleanup_k8s(self):
self.log_info("Cleanup EKS clusters")
for region in self.__cluster_regions:
self.log_dbg(f"Region {region}")
clusters = self.eks_client(region).list_clusters()['clusters']
for cluster_name in clusters:
self.log_info(f"Cleanup k8s jobs in EKS cluster {cluster_name} in region {region}")
client = self.kubectl_client(region, cluster_name)
self.log_info(f"Cleaning jobs in EKS cluster {cluster_name} in region {region}")
client = self.kubectl_client(region, cluster_name).BatchV1Api()
clean_jobs(self, client, cluster_name)

self.log_info(f"Cleaning namespaces in EKS cluster {cluster_name} in region {region}")
client = self.kubectl_client(region, cluster_name).CoreV1Api()
clean_helm_namespaces(self, client, cluster_name)
16 changes: 10 additions & 6 deletions ocw/lib/gke.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import googleapiclient.discovery
from google.oauth2 import service_account
from ocw.lib.gce import GCE
from ocw.lib.k8s import clean_jobs
from ocw.lib.k8s import clean_jobs, clean_helm_namespaces


class GKE(GCE):
Expand Down Expand Up @@ -45,7 +45,7 @@ def kubectl_client(self, zone, cluster):
raise FileNotFoundError(f"{kubeconfig} doesn't exists")

kubernetes.config.load_kube_config(config_file=kubeconfig)
self.__kubectl_client[zone] = kubernetes.client.BatchV1Api()
self.__kubectl_client[zone] = kubernetes.client
return self.__kubectl_client[zone]

def get_clusters(self, zone):
Expand All @@ -56,12 +56,16 @@ def get_clusters(self, zone):

return []

def cleanup_k8s_jobs(self):
self.log_info("Cleanup k8s jobs in GKE clusters")
def cleanup_k8s(self):
self.log_info("Cleanup GKE clusters")
for region in self.list_regions():
for zone in self.list_zones(region):
for cluster in self.get_clusters(zone):
cluster_name = cluster["name"]
self.log_info(f"Cleanup k8s jobs in GKE cluster {cluster_name} in zone {zone}")
client = self.kubectl_client(zone, cluster)
self.log_info(f"Cleaning jobs in GKE cluster {cluster_name} in zone {zone}")
client = self.kubectl_client(zone, cluster).BatchV1Api()
clean_jobs(self, client, cluster_name)

self.log_info(f"Cleaning namespaces in GKE cluster {cluster_name} in zone {zone}")
client = self.kubectl_client(zone, cluster).CoreV1Api()
clean_helm_namespaces(self, client, cluster_name)
20 changes: 19 additions & 1 deletion ocw/lib/k8s.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime, timezone
from kubernetes.client import BatchV1Api
from kubernetes.client import BatchV1Api, CoreV1Api
from ocw.lib.provider import Provider


Expand All @@ -16,3 +16,21 @@ def clean_jobs(provider: Provider, client: BatchV1Api, cluster_name: str):
else:
provider.log_info(f"Skip deleting from {cluster_name} the job {job.metadata.name} " +
f"with age {age} (days)")


def clean_helm_namespaces(provider: Provider, client: CoreV1Api, cluster_name: str):
now = datetime.now(timezone.utc)
# Retrieve the list of all namespaces
namespaces = client.list_namespace(watch=False)

for ns in namespaces.items:
age = (now - ns.metadata.creation_timestamp).days
if (ns.metadata.name.startswith('helm-test') and age > 7):
# Delete the namespace
if not provider.dry_run:
provider.log_info(f"Deleting namespace {ns.metadata.name} created {ns.metadata.creation_timestamp}")
client.delete_namespace(ns.metadata.name)
else:
provider.log_info(f"Skip deleting namespace {ns.metadata.name} created {ns.metadata.creation_timestamp}")
else:
provider.log_info(f"Namespace {ns.metadata.name} will be kept.")
27 changes: 26 additions & 1 deletion tests/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,36 @@ def load_kube_config(self, *args, **kwargs):


class MockedKubernetesClient():
def __init__(self, jobs=None):
def __init__(self, jobs=None, namespaces=None):
if jobs is None:
jobs = []
if namespaces is None:
namespaces = []
self.jobs = jobs
self.namespaces = namespaces
self.deleted_jobs = []
self.deleted_namespaces = []

# pylint: disable=C0103
def BatchV1Api(self):
return self

# pylint: disable=C0103
def CoreV1Api(self):
return self

def list_job_for_all_namespaces(self, *args, **kwargs):
return MockedKubernetesResult(self.jobs)

def delete_namespaced_job(self, name, namespace):
self.deleted_jobs.append(name)

def list_namespace(self, *args, **kwargs):
return MockedKubernetesResult(self.namespaces)

def delete_namespace(self, name):
self.deleted_namespaces.append(name)


class MockedKubernetesResult():
def __init__(self, items):
Expand All @@ -46,6 +60,17 @@ def __init__(self, name, age):
self.metadata = MockedKubernetesJobMetadata(name)


class MockedKubernetesNamespaceMetadata():
def __init__(self, name, age):
self.name = name
self.creation_timestamp = datetime.now(timezone.utc) - timedelta(days=age)


class MockedKubernetesNamespace():
def __init__(self, name, age):
self.metadata = MockedKubernetesNamespaceMetadata(name, age)


class MockedSubprocessReturn():
def __init__(self, returncode=0, stdout="", stderr=""):
self.returncode = returncode
Expand Down
4 changes: 2 additions & 2 deletions tests/test_aks.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def test_cleanup_k8s_jobs(aks_patch, monkeypatch):
monkeypatch.setattr(AKS, "kubectl_client", lambda *args, **kwargs: mocked_kubernetes)
monkeypatch.setattr(PCWConfig, "get_k8s_clusters_for_provider", lambda *args, **kwargs: [
{'resource_group': 'group', 'cluster_name': 'cluster'}])
aks_patch.cleanup_k8s_jobs()
aks_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 1
assert mocked_kubernetes.deleted_jobs[0] == "job1"

# test dry_run
aks_patch.dry_run = True
mocked_kubernetes.deleted_jobs = []
aks_patch.cleanup_k8s_jobs()
aks_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 0
4 changes: 2 additions & 2 deletions tests/test_eks.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,12 @@ def test_cleanup_k8s_jobs(eks_patch, monkeypatch):
monkeypatch.setattr(kubernetes, 'config', MockedKubernetesConfig())
mocked_kubernetes = MockedKubernetesClient([MockedKubernetesJob("job1", 1), MockedKubernetesJob("job2", 0)])
monkeypatch.setattr(EKS, "kubectl_client", lambda *args, **kwargs: mocked_kubernetes)
eks_patch.cleanup_k8s_jobs()
eks_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 1
assert mocked_kubernetes.deleted_jobs[0] == "job1"

# test dry_run
eks_patch.dry_run = True
mocked_kubernetes.deleted_jobs = []
eks_patch.cleanup_k8s_jobs()
eks_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 0
26 changes: 23 additions & 3 deletions tests/test_gke.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ocw.lib.provider import Provider
import pytest
import kubernetes
from tests.kubernetes import MockedKubernetesClient, MockedKubernetesConfig, MockedKubernetesJob
from tests.kubernetes import MockedKubernetesClient, MockedKubernetesConfig, MockedKubernetesJob, MockedKubernetesNamespace


@pytest.fixture
Expand All @@ -22,11 +22,31 @@ def test_cleanup_k8s_jobs(k8s_patch, monkeypatch):
mocked_kubernetes = MockedKubernetesClient([MockedKubernetesJob("job1", 1), MockedKubernetesJob("job2", 0)])
monkeypatch.setattr(kubernetes, 'client', mocked_kubernetes)
monkeypatch.setattr(GKE, 'kubectl_client', lambda *args, **kwargs: mocked_kubernetes)
k8s_patch.cleanup_k8s_jobs()
k8s_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 1
assert mocked_kubernetes.deleted_jobs[0] == "job1"

k8s_patch.dry_run = True
mocked_kubernetes.deleted_jobs = []
k8s_patch.cleanup_k8s_jobs()
k8s_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 0


def test_cleanup_k8s_namespaces(k8s_patch, monkeypatch):
monkeypatch.setattr(kubernetes, 'config', MockedKubernetesConfig())
mocked_kubernetes = MockedKubernetesClient(
[MockedKubernetesNamespace("helm-test-234", 1),
MockedKubernetesNamespace("helm-test-342", 9),
MockedKubernetesNamespace("kube-system", 9),
MockedKubernetesNamespace("something-else-745", 9)
])
monkeypatch.setattr(kubernetes, 'client', mocked_kubernetes)
monkeypatch.setattr(GKE, 'kubectl_client', lambda *args, **kwargs: mocked_kubernetes)
k8s_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_namespaces) == 1
assert mocked_kubernetes.deleted_jobs[0] == "job1"

k8s_patch.dry_run = True
mocked_kubernetes.deleted_jobs = []
k8s_patch.cleanup_k8s()
assert len(mocked_kubernetes.deleted_jobs) == 0

0 comments on commit 2f872c8

Please sign in to comment.