From af8f0df2b0ad66dad1983c2d5a41b5e49fd7ebfb Mon Sep 17 00:00:00 2001 From: Tullio Sebastiani Date: Wed, 2 Aug 2023 12:22:56 +0200 Subject: [PATCH] prometheus telemetry upload + config.yaml some fixes typos and logs max retries in config telemetry id with run_uuid safe_logger --- config/config.yaml | 7 ++++++- run_kraken.py | 45 ++++++++++++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 088946bc8..d2dfb0141 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -68,4 +68,9 @@ telemetry: enabled: True api_url: https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production username: username - password: password \ No newline at end of file + password: password + prometheus_backup: True + full_prometheus_backup: False + backup_threads: 5 + archive_path: /tmp + max_retries: 0 \ No newline at end of file diff --git a/run_kraken.py b/run_kraken.py index 76b7c81bd..11538e1b5 100644 --- a/run_kraken.py +++ b/run_kraken.py @@ -25,7 +25,7 @@ import server as server import kraken.prometheus.client as promcli from kraken import plugins -from krkn_lib_kubernetes import KrknLibKubernetes, KrknTelemetry, ChaosRunTelemetry +from krkn_lib_kubernetes import KrknLibKubernetes, KrknTelemetry, ChaosRunTelemetry, SafeLogger KUBE_BURNER_URL = ( "https://github.com/cloud-bulldozer/kube-burner/" @@ -98,15 +98,32 @@ def main(cfg): ) sys.exit(1) logging.info("Initializing client to talk to the Kubernetes cluster") + + # Generate uuid for the run + if run_uuid: + logging.info( + "Using the uuid defined by the user for the run: %s" % run_uuid + ) + else: + run_uuid = str(uuid.uuid4()) + logging.info("Generated a uuid for the run: %s" % run_uuid) + + # request_id for telemetry is generated once here and used everywhere + telemetry_request_id = f"{int(time.time())}-{run_uuid}" + telemetry_log_file = f'{config["telemetry"]["archive_path"]}/{telemetry_request_id}.log' + safe_logger = SafeLogger(filename=telemetry_log_file) + try: kubeconfig_path os.environ["KUBECONFIG"] = str(kubeconfig_path) + # krkn-lib-kubernetes init kubecli = KrknLibKubernetes(kubeconfig_path=kubeconfig_path) - telemetry = KrknTelemetry() - except NameError: kubecli.initialize_clients(None) + # KrknTelemetry init + telemetry = KrknTelemetry(safe_logger, kubecli) + # find node kraken might be running on kubecli.find_kraken_node() @@ -143,14 +160,7 @@ def main(cfg): if deploy_performance_dashboards: performance_dashboards.setup(dashboard_repo, distribution) - # Generate uuid for the run - if run_uuid: - logging.info( - "Using the uuid defined by the user for the run: %s" % run_uuid - ) - else: - run_uuid = str(uuid.uuid4()) - logging.info("Generated a uuid for the run: %s" % run_uuid) + # Initialize the start iteration to 0 iteration = 0 @@ -174,6 +184,7 @@ def main(cfg): start_time = int(time.time()) litmus_installed = False chaos_telemetry = ChaosRunTelemetry() + chaos_telemetry.run_uuid = run_uuid # Loop to run the chaos starts here while int(iteration) < iterations and run_signal != "STOP": # Inject chaos scenarios specified in the config @@ -364,9 +375,17 @@ def main(cfg): iteration += 1 logging.info("") - # send telemetry - telemetry.send_telemetry(config["telemetry"],str(uuid.uuid1()), chaos_telemetry, kubecli) + # telemetry + logging.info(f"telemetry data will be stored on s3 bucket folder: {telemetry_request_id}") + logging.info(f"telemetry upload log: {safe_logger.log_file_name}") + + telemetry.send_telemetry(config["telemetry"], telemetry_request_id, chaos_telemetry) + safe_logger.info("archives download started:") + prometheus_archive_files = telemetry.get_ocp_prometheus_data(config["telemetry"], telemetry_request_id) + safe_logger.info("archives upload started:") + telemetry.put_ocp_prometheus_data(config["telemetry"], prometheus_archive_files, telemetry_request_id) + # Capture the end time end_time = int(time.time())