Skip to content

Commit

Permalink
debug: raised recursion limit in k8s orchestrator
Browse files Browse the repository at this point in the history
  • Loading branch information
luccadibe committed Dec 10, 2024
1 parent 77aba40 commit 0d5c8da
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 16 deletions.
3 changes: 3 additions & 0 deletions backend/internal/experiment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def run_experiment(self, experiment_id, output_format, runs):
)

engine.run(runs=runs, orchestration_timeout=None, randomize=False, accounting=False)
except Exception as e:
logger.error(f"Error running experiment: {e}")
self.update_experiment(experiment_id, {'status': 'FAILED', 'error_message': str(e)})
finally:
self.release_lock()

Expand Down
43 changes: 27 additions & 16 deletions backend/internal/kubernetes_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,35 @@ def __init__(self, experiment_config=None):
logger.error("No experiment configuration provided. Continue with empty configuration")
experiment_config = {}
self.experiment_config: dict = experiment_config
config.load_incluster_config()
self.kube_client = client.CoreV1Api()
self.api_client = client.AppsV1Api()

logger.info("Loading all running resources in k8s cluster")

self.list_of_all_pods = self.kube_client.list_pod_for_all_namespaces(watch=False)
#for i in self.list_of_all_pods.items:
#print("%s\t%s\t%s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name))
pass

self.list_of_all_services = self.kube_client.list_service_for_all_namespaces(watch=False)
#for i in self.list_of_all_services.items:
#print("%s\t%s" % (i.metadata.namespace, i.metadata.name))
import sys
sys.setrecursionlimit(10000)

try:
config.load_incluster_config()
self.kube_client = client.CoreV1Api()
self.api_client = client.AppsV1Api()

"""Check if all of experiment_config.sue.required services are running"""
self.required_services = self.experiment_config["experiment"]["sue"]["required"]
#self._check_required_services(self.required_services)
logger.info("Loading all running resources in k8s cluster")


try:
self.list_of_all_pods = self.kube_client.list_pod_for_all_namespaces(watch=False)
self.list_of_all_services = self.kube_client.list_service_for_all_namespaces(watch=False)
except Exception as e:
logger.error(f"Error loading cluster resources: {str(e)}")
# fallback
self.list_of_all_pods = []
self.list_of_all_services = []

self.required_services = self.experiment_config.get("experiment", {}).get("sue", {}).get("required", [])

except Exception as e:
logger.error(f"Error initializing Kubernetes orchestrator: {str(e)}")
raise OrchestratorException(
message="Failed to initialize Kubernetes orchestrator",
explanation=str(e)
)


def _check_required_services(self, required_services) -> bool:
Expand Down

0 comments on commit 0d5c8da

Please sign in to comment.