Skip to content

Commit

Permalink
bootsys stage to wait for kyverno before job recreation
Browse files Browse the repository at this point in the history
IM:CRAYSAT-1857
Reviewer:Ryan

Updating boot platform-services stage to wait for kyverno avaiability
before going to recreate cronjob stage
  • Loading branch information
Shivaprasad Ashok Metimath committed Jun 13, 2024
1 parent bd4ce5e commit d33131b
Showing 1 changed file with 38 additions and 0 deletions.
38 changes: 38 additions & 0 deletions sat/cli/bootsys/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
from multiprocessing import Process

from csm_api_client.k8s import load_kube_api
from kubernetes import client, config
from kubernetes.client import BatchV1Api
from kubernetes.client.rest import ApiException
from kubernetes.config import ConfigException
from paramiko import SSHException

Expand Down Expand Up @@ -487,6 +489,42 @@ def do_kubelet_start(ncn_groups):
raise FatalPlatformError("Kubernetes API not available after timeout")
LOGGER.info("Kubernetes API is available")

# Wait for Kyverno pods to be ready
wait_for_kyverno_pods()


def wait_for_kyverno_pods(timeout=300, interval=10):
"""Wait for Kyverno pods to be up and running.
Args:
timeout (int): Maximum time to wait for the pods to be ready.
interval (int): Time interval between checks.
Raises:
FatalPlatformError: if Kyverno pods are not ready after timeout.
"""
config.load_kube_config() # Ensure your kubeconfig is properly set up
v1 = client.CoreV1Api()
namespace = 'kyverno'
end_time = time.time() + timeout

LOGGER.info(f"Waiting up to {timeout} seconds for Kyverno pods to be ready in namespace {namespace}")

while time.time() < end_time:
try:
pods = v1.list_namespaced_pod(namespace)
all_running = all(pod.status.phase == 'Running' for pod in pods.items)
if all_running:
LOGGER.info("All Kyverno pods are up and running")
return True
except ApiException as e:
LOGGER.error(f"Exception when checking Kyverno pods: {e}")

LOGGER.info(f"Kyverno pods are not ready yet. Waiting for {interval} seconds before retrying...")
time.sleep(interval)

raise FatalPlatformError(f"Kyverno pods not ready after {timeout} seconds")


def do_recreate_cronjobs(_):
"""Recreate cronjobs that are not being scheduled on time."""
Expand Down

0 comments on commit d33131b

Please sign in to comment.