From e98fbf7387083dc1f3d3d9edcbe7a2c2b0934cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Dost=C3=A1l?= Date: Thu, 3 Aug 2023 11:23:41 +0200 Subject: [PATCH] Cleanup old helm-test k8s namespaces --- cleanup_k8s.py | 3 +++ ocw/lib/aks.py | 18 +++++++++++++----- ocw/lib/eks.py | 20 +++++++++++++++----- ocw/lib/gke.py | 20 +++++++++++++++----- ocw/lib/k8s.py | 20 +++++++++++++++++++- tests/kubernetes.py | 27 ++++++++++++++++++++++++++- tests/test_aks.py | 26 +++++++++++++++++++++++++- tests/test_eks.py | 30 +++++++++++++++++++++++++++++- tests/test_gke.py | 29 +++++++++++++++++++++++++++-- 9 files changed, 172 insertions(+), 21 deletions(-) diff --git a/cleanup_k8s.py b/cleanup_k8s.py index 8d1a226e..8de21226 100644 --- a/cleanup_k8s.py +++ b/cleanup_k8s.py @@ -20,10 +20,13 @@ def main(): try: if ProviderChoice.GCE in providers: GKE(namespace).cleanup_k8s_jobs() + GKE(namespace).cleanup_k8s_namespaces() if ProviderChoice.EC2 in providers: EKS(namespace).cleanup_k8s_jobs() + EKS(namespace).cleanup_k8s_namespaces() if ProviderChoice.AZURE in providers: AKS(namespace).cleanup_k8s_jobs() + AKS(namespace).cleanup_k8s_namespaces() except Exception: logger.exception("[%s] Cleanup failed!", namespace) diff --git a/ocw/lib/aks.py b/ocw/lib/aks.py index 26c8f5d7..18de9c00 100644 --- a/ocw/lib/aks.py +++ b/ocw/lib/aks.py @@ -1,7 +1,7 @@ import os import kubernetes from ocw.lib.provider import Provider -from ocw.lib.k8s import clean_jobs +from ocw.lib.k8s import clean_jobs, clean_namespaces from webui.PCWConfig import PCWConfig @@ -35,14 +35,22 @@ def kubectl_client(self, resource_group: str, cluster_name: str): f"for resource-group {resource_group} : {res.stderr}") kubernetes.config.load_kube_config(config_file=kubeconfig) - self.__kubectl_client[cluster_name] = kubernetes.client.BatchV1Api() + self.__kubectl_client[cluster_name] = kubernetes.client return self.__kubectl_client[cluster_name] def cleanup_k8s_jobs(self): clusters = PCWConfig.get_k8s_clusters_for_provider(self._namespace, "azure") - self.log_info(f"Cleanup k8s jobs in AKS clusters. {len(clusters)} will be queried ") + self.log_info(f"Cleanup jobs in AKS clusters. {len(clusters)} will be queried ") for cluster in clusters: - self.log_info(f"Cleanup k8s jobs in AKS cluster {cluster['cluster_name']}") - client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"]) + self.log_info(f"Cleaning jobs in AKS cluster {cluster['cluster_name']}") + client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"]).BatchV1Api() clean_jobs(self, client, cluster["cluster_name"]) + + def cleanup_k8s_namespaces(self): + clusters = PCWConfig.get_k8s_clusters_for_provider(self._namespace, "azure") + self.log_info(f"Cleanup namespaces in AKS clusters. {len(clusters)} will be queried ") + for cluster in clusters: + self.log_info(f"Cleaning namespaces in AKS cluster {cluster['cluster_name']}") + client = self.kubectl_client(cluster["resource_group"], cluster["cluster_name"]).CoreV1Api() + clean_namespaces(self, client, cluster["cluster_name"]) diff --git a/ocw/lib/eks.py b/ocw/lib/eks.py index 0d3495ee..5feb8d62 100644 --- a/ocw/lib/eks.py +++ b/ocw/lib/eks.py @@ -6,7 +6,7 @@ import boto3 from webui.PCWConfig import PCWConfig, ConfigFile from ocw.lib.provider import Provider -from ocw.lib.k8s import clean_jobs +from ocw.lib.k8s import clean_jobs, clean_namespaces TAG_IGNORE = 'pcw_ignore' @@ -79,7 +79,7 @@ def kubectl_client(self, region: str, cluster_name: str): raise RuntimeError(f"Cannot get the kubeconfig for the cluster {cluster_name} on region {region}") kubernetes.config.load_kube_config(config_file=kubeconfig) - self.__kubectl_client[region_cluster] = kubernetes.client.BatchV1Api() + self.__kubectl_client[region_cluster] = kubernetes.client return self.__kubectl_client[region_cluster] @@ -155,11 +155,21 @@ def delete_all_clusters(self) -> None: self.eks_client(region).delete_cluster(name=cluster) def cleanup_k8s_jobs(self): - self.log_info("Cleanup k8s jobs in EKS clusters") + self.log_info("Cleanup jobs in EKS clusters") for region in self.__cluster_regions: self.log_dbg(f"Region {region}") clusters = self.eks_client(region).list_clusters()['clusters'] for cluster_name in clusters: - self.log_info(f"Cleanup k8s jobs in EKS cluster {cluster_name} in region {region}") - client = self.kubectl_client(region, cluster_name) + self.log_info(f"Cleaning jobs in EKS cluster {cluster_name} in region {region}") + client = self.kubectl_client(region, cluster_name).BatchV1Api() clean_jobs(self, client, cluster_name) + + def cleanup_k8s_namespaces(self): + self.log_info("Cleanup namespaces in EKS clusters") + for region in self.__cluster_regions: + self.log_dbg(f"Region {region}") + clusters = self.eks_client(region).list_clusters()['clusters'] + for cluster_name in clusters: + self.log_info(f"Cleaning namespaces in EKS cluster {cluster_name} in region {region}") + client = self.kubectl_client(region, cluster_name).CoreV1Api() + clean_namespaces(self, client, cluster_name) diff --git a/ocw/lib/gke.py b/ocw/lib/gke.py index 8b4b3427..97ca0dca 100644 --- a/ocw/lib/gke.py +++ b/ocw/lib/gke.py @@ -3,7 +3,7 @@ import googleapiclient.discovery from google.oauth2 import service_account from ocw.lib.gce import GCE -from ocw.lib.k8s import clean_jobs +from ocw.lib.k8s import clean_jobs, clean_namespaces class GKE(GCE): @@ -45,7 +45,7 @@ def kubectl_client(self, zone, cluster): raise FileNotFoundError(f"{kubeconfig} doesn't exists") kubernetes.config.load_kube_config(config_file=kubeconfig) - self.__kubectl_client[zone] = kubernetes.client.BatchV1Api() + self.__kubectl_client[zone] = kubernetes.client return self.__kubectl_client[zone] def get_clusters(self, zone): @@ -57,11 +57,21 @@ def get_clusters(self, zone): return [] def cleanup_k8s_jobs(self): - self.log_info("Cleanup k8s jobs in GKE clusters") + self.log_info("Cleanup jobs in GKE clusters") for region in self.list_regions(): for zone in self.list_zones(region): for cluster in self.get_clusters(zone): cluster_name = cluster["name"] - self.log_info(f"Cleanup k8s jobs in GKE cluster {cluster_name} in zone {zone}") - client = self.kubectl_client(zone, cluster) + self.log_info(f"Cleaning jobs in GKE cluster {cluster_name} in zone {zone}") + client = self.kubectl_client(zone, cluster).BatchV1Api() clean_jobs(self, client, cluster_name) + + def cleanup_k8s_namespaces(self): + self.log_info("Cleanup namespaces in GKE clusters") + for region in self.list_regions(): + for zone in self.list_zones(region): + for cluster in self.get_clusters(zone): + cluster_name = cluster["name"] + self.log_info(f"Cleaning namespaces in GKE cluster {cluster_name} in zone {zone}") + client = self.kubectl_client(zone, cluster).CoreV1Api() + clean_namespaces(self, client, cluster_name) diff --git a/ocw/lib/k8s.py b/ocw/lib/k8s.py index d34f34c9..60753c5c 100644 --- a/ocw/lib/k8s.py +++ b/ocw/lib/k8s.py @@ -1,5 +1,5 @@ from datetime import datetime, timezone -from kubernetes.client import BatchV1Api +from kubernetes.client import BatchV1Api, CoreV1Api from ocw.lib.provider import Provider @@ -16,3 +16,21 @@ def clean_jobs(provider: Provider, client: BatchV1Api, cluster_name: str): else: provider.log_info(f"Skip deleting from {cluster_name} the job {job.metadata.name} " + f"with age {age} (days)") + + +def clean_namespaces(provider: Provider, client: CoreV1Api, cluster_name: str): + now = datetime.now(timezone.utc) + # Retrieve the list of all namespaces + namespaces = client.list_namespace(watch=False) + + for ns in namespaces.items: + age = (now - ns.metadata.creation_timestamp).days + if (ns.metadata.name.startswith('helm-test') and age > 7): + # Delete the namespace + if not provider.dry_run: + provider.log_info(f"Deleting namespace {ns.metadata.name} created {ns.metadata.creation_timestamp}") + client.delete_namespace(ns.metadata.name) + else: + provider.log_info(f"Skip deleting namespace {ns.metadata.name} created {ns.metadata.creation_timestamp}") + else: + provider.log_info(f"Namespace {ns.metadata.name} will be kept.") diff --git a/tests/kubernetes.py b/tests/kubernetes.py index f6e3da21..b0757175 100644 --- a/tests/kubernetes.py +++ b/tests/kubernetes.py @@ -7,22 +7,36 @@ def load_kube_config(self, *args, **kwargs): class MockedKubernetesClient(): - def __init__(self, jobs=None): + def __init__(self, jobs=None, namespaces=None): if jobs is None: jobs = [] + if namespaces is None: + namespaces = [] self.jobs = jobs + self.namespaces = namespaces self.deleted_jobs = [] + self.deleted_namespaces = [] # pylint: disable=C0103 def BatchV1Api(self): return self + # pylint: disable=C0103 + def CoreV1Api(self): + return self + def list_job_for_all_namespaces(self, *args, **kwargs): return MockedKubernetesResult(self.jobs) def delete_namespaced_job(self, name, namespace): self.deleted_jobs.append(name) + def list_namespace(self, *args, **kwargs): + return MockedKubernetesResult(self.namespaces) + + def delete_namespace(self, name): + self.deleted_namespaces.append(name) + class MockedKubernetesResult(): def __init__(self, items): @@ -46,6 +60,17 @@ def __init__(self, name, age): self.metadata = MockedKubernetesJobMetadata(name) +class MockedKubernetesNamespaceMetadata(): + def __init__(self, name, age): + self.name = name + self.creation_timestamp = datetime.now(timezone.utc) - timedelta(days=age) + + +class MockedKubernetesNamespace(): + def __init__(self, name, age): + self.metadata = MockedKubernetesNamespaceMetadata(name, age) + + class MockedSubprocessReturn(): def __init__(self, returncode=0, stdout="", stderr=""): self.returncode = returncode diff --git a/tests/test_aks.py b/tests/test_aks.py index 04d7bdae..41341493 100644 --- a/tests/test_aks.py +++ b/tests/test_aks.py @@ -3,7 +3,8 @@ from ocw.lib.provider import Provider from ocw.lib.aks import AKS from webui.PCWConfig import PCWConfig -from tests.kubernetes import MockedSubprocessReturn, MockedKubernetesClient, MockedKubernetesConfig, MockedKubernetesJob +from tests.kubernetes import MockedSubprocessReturn, MockedKubernetesClient, MockedKubernetesConfig +from tests.kubernetes import MockedKubernetesJob, MockedKubernetesNamespace class MockedAKSCluster(): @@ -48,3 +49,26 @@ def test_cleanup_k8s_jobs(aks_patch, monkeypatch): mocked_kubernetes.deleted_jobs = [] aks_patch.cleanup_k8s_jobs() assert len(mocked_kubernetes.deleted_jobs) == 0 + + +def test_cleanup_k8s_namespaces(aks_patch, monkeypatch): + mocked_kubernetes = MockedKubernetesClient(namespaces=[ + MockedKubernetesNamespace("helm-test-234", 1), # good name, too fresh + MockedKubernetesNamespace("helm-test-342", 9), # good name, old enough + MockedKubernetesNamespace("kube-system", 9), # bad name + MockedKubernetesNamespace("something-else-745", 9) # bad name + ]) + monkeypatch.setattr(AKS, "kubectl_client", lambda *args, **kwargs: mocked_kubernetes) + monkeypatch.setattr(PCWConfig, "get_k8s_clusters_for_provider", lambda *args, **kwargs: [ + {'resource_group': 'group', 'cluster_name': 'cluster'}]) + assert len(mocked_kubernetes.list_namespace) == 4 + + aks_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 1 + assert mocked_kubernetes.deleted_namespaces[0] == "helm-test-342" + + # test dry_run + aks_patch.dry_run = True + mocked_kubernetes.deleted_namespaces = [] + aks_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 0 diff --git a/tests/test_eks.py b/tests/test_eks.py index 1358c6c5..381b0ef3 100644 --- a/tests/test_eks.py +++ b/tests/test_eks.py @@ -5,7 +5,8 @@ from ocw.lib.eks import EKS from webui.PCWConfig import PCWConfig from tests.generators import mock_get_feature_property -from tests.kubernetes import MockedSubprocessReturn, MockedKubernetesClient, MockedKubernetesConfig, MockedKubernetesJob +from tests.kubernetes import MockedSubprocessReturn, MockedKubernetesClient, MockedKubernetesConfig +from tests.kubernetes import MockedKubernetesJob, MockedKubernetesNamespace def test_all_clusters(eks_patch, monkeypatch): @@ -189,3 +190,30 @@ def test_cleanup_k8s_jobs(eks_patch, monkeypatch): mocked_kubernetes.deleted_jobs = [] eks_patch.cleanup_k8s_jobs() assert len(mocked_kubernetes.deleted_jobs) == 0 + + +def test_cleanup_k8s_namespaces(eks_patch, monkeypatch): + mocked_eks = MockedEKSClient() + mocked_eks.clusters_list = {'clusters': ['cluster1']} + monkeypatch.setattr(EKS, 'eks_client', lambda self, region: mocked_eks) + + monkeypatch.setattr(EKS, 'create_credentials_file', lambda *args, **kwargs: None) + monkeypatch.setattr(kubernetes, 'config', MockedKubernetesConfig()) + mocked_kubernetes = MockedKubernetesClient(namespaces=[ + MockedKubernetesNamespace("helm-test-234", 1), # good name, too fresh + MockedKubernetesNamespace("helm-test-342", 9), # good name, old enough + MockedKubernetesNamespace("kube-system", 9), # bad name + MockedKubernetesNamespace("something-else-745", 9) # bad name + ]) + monkeypatch.setattr(EKS, "kubectl_client", lambda *args, **kwargs: mocked_kubernetes) + assert len(mocked_kubernetes.list_namespace) == 4 + + eks_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 1 + assert mocked_kubernetes.deleted_namespaces[0] == "helm-test-342" + + # test dry_run + eks_patch.dry_run = True + mocked_kubernetes.deleted_namespaces = [] + eks_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 0 diff --git a/tests/test_gke.py b/tests/test_gke.py index c2652690..22c5c9b3 100644 --- a/tests/test_gke.py +++ b/tests/test_gke.py @@ -3,7 +3,8 @@ from ocw.lib.provider import Provider import pytest import kubernetes -from tests.kubernetes import MockedKubernetesClient, MockedKubernetesConfig, MockedKubernetesJob +from tests.kubernetes import MockedKubernetesClient, MockedKubernetesConfig +from tests.kubernetes import MockedKubernetesJob, MockedKubernetesNamespace @pytest.fixture @@ -19,14 +20,38 @@ def k8s_patch(monkeypatch): def test_cleanup_k8s_jobs(k8s_patch, monkeypatch): monkeypatch.setattr(kubernetes, 'config', MockedKubernetesConfig()) - mocked_kubernetes = MockedKubernetesClient([MockedKubernetesJob("job1", 1), MockedKubernetesJob("job2", 0)]) + mocked_kubernetes = MockedKubernetesClient(jobs=[MockedKubernetesJob("job1", 1), MockedKubernetesJob("job2", 0)]) monkeypatch.setattr(kubernetes, 'client', mocked_kubernetes) monkeypatch.setattr(GKE, 'kubectl_client', lambda *args, **kwargs: mocked_kubernetes) k8s_patch.cleanup_k8s_jobs() assert len(mocked_kubernetes.deleted_jobs) == 1 assert mocked_kubernetes.deleted_jobs[0] == "job1" + # test dry_run k8s_patch.dry_run = True mocked_kubernetes.deleted_jobs = [] k8s_patch.cleanup_k8s_jobs() assert len(mocked_kubernetes.deleted_jobs) == 0 + + +def test_cleanup_k8s_namespaces(k8s_patch, monkeypatch): + monkeypatch.setattr(kubernetes, 'config', MockedKubernetesConfig()) + mocked_kubernetes = MockedKubernetesClient(namespaces=[ + MockedKubernetesNamespace("helm-test-234", 1), # good name, too fresh + MockedKubernetesNamespace("helm-test-342", 9), # good name, old enough + MockedKubernetesNamespace("kube-system", 9), # bad name + MockedKubernetesNamespace("something-else-745", 9) # bad name + ]) + monkeypatch.setattr(kubernetes, 'client', mocked_kubernetes) + monkeypatch.setattr(GKE, 'kubectl_client', lambda *args, **kwargs: mocked_kubernetes) + assert len(mocked_kubernetes.list_namespace) == 4 + + k8s_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 1 + assert mocked_kubernetes.deleted_namespaces[0] == "helm-test-342" + + # test dry_run + k8s_patch.dry_run = True + mocked_kubernetes.deleted_namespaces = [] + k8s_patch.cleanup_k8s_namespaces() + assert len(mocked_kubernetes.deleted_namespaces) == 0