diff --git a/backend/internal/experiment_manager.py b/backend/internal/experiment_manager.py index 0257f40..d6b7f97 100644 --- a/backend/internal/experiment_manager.py +++ b/backend/internal/experiment_manager.py @@ -106,6 +106,9 @@ def run_experiment(self, experiment_id, output_format, runs): ) engine.run(runs=runs, orchestration_timeout=None, randomize=False, accounting=False) + except Exception as e: + logger.error(f"Error running experiment: {e}") + self.update_experiment(experiment_id, {'status': 'FAILED', 'error_message': str(e)}) finally: self.release_lock() diff --git a/backend/internal/kubernetes_orchestrator.py b/backend/internal/kubernetes_orchestrator.py index 8625ab4..0c555cb 100644 --- a/backend/internal/kubernetes_orchestrator.py +++ b/backend/internal/kubernetes_orchestrator.py @@ -28,24 +28,35 @@ def __init__(self, experiment_config=None): logger.error("No experiment configuration provided. Continue with empty configuration") experiment_config = {} self.experiment_config: dict = experiment_config - config.load_incluster_config() - self.kube_client = client.CoreV1Api() - self.api_client = client.AppsV1Api() - - logger.info("Loading all running resources in k8s cluster") - self.list_of_all_pods = self.kube_client.list_pod_for_all_namespaces(watch=False) - #for i in self.list_of_all_pods.items: - #print("%s\t%s\t%s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name)) - pass - - self.list_of_all_services = self.kube_client.list_service_for_all_namespaces(watch=False) - #for i in self.list_of_all_services.items: - #print("%s\t%s" % (i.metadata.namespace, i.metadata.name)) + import sys + sys.setrecursionlimit(10000) + + try: + config.load_incluster_config() + self.kube_client = client.CoreV1Api() + self.api_client = client.AppsV1Api() - """Check if all of experiment_config.sue.required services are running""" - self.required_services = self.experiment_config["experiment"]["sue"]["required"] - #self._check_required_services(self.required_services) + logger.info("Loading all running resources in k8s cluster") + + + try: + self.list_of_all_pods = self.kube_client.list_pod_for_all_namespaces(watch=False) + self.list_of_all_services = self.kube_client.list_service_for_all_namespaces(watch=False) + except Exception as e: + logger.error(f"Error loading cluster resources: {str(e)}") + # fallback + self.list_of_all_pods = [] + self.list_of_all_services = [] + + self.required_services = self.experiment_config.get("experiment", {}).get("sue", {}).get("required", []) + + except Exception as e: + logger.error(f"Error initializing Kubernetes orchestrator: {str(e)}") + raise OrchestratorException( + message="Failed to initialize Kubernetes orchestrator", + explanation=str(e) + ) def _check_required_services(self, required_services) -> bool: