Skip to content

Commit

Permalink
Leverage Health Check Settings for non-container Services to Determin…
Browse files Browse the repository at this point in the history
…e Need for Auto Redeployment (#1475)

* check health_check setting for non container service to deicde if auto redeployment is needed

* use HEALTHCHECK instead of HEALTH_CHECK

* put non container redeploy check in a helper function

* address some comments

* address some comments

* bump version

* rename functions

* rename functions
  • Loading branch information
liyaqin1 authored Mar 5, 2024
1 parent 91b17e5 commit 18065dd
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 7 deletions.
2 changes: 1 addition & 1 deletion deploy-agent/deployd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@
# 2: puppet applied successfully with changes
PUPPET_SUCCESS_EXIT_CODES = [0, 2]

__version__ = '1.2.58'
__version__ = '1.2.59'
48 changes: 42 additions & 6 deletions deploy-agent/deployd/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import subprocess
from typing import Any, Optional, Union
import yaml
import requests


import json
Expand Down Expand Up @@ -222,7 +223,7 @@ def get_info_from_facter(keys) -> Optional[dict]:
return None


def redeploy_check(labels, service, redeploy) -> int:
def redeploy_check_for_container(labels, service, redeploy) -> int:
max_retry = REDEPLOY_MAX_RETRY
for label in labels:
if "redeploy_max_retry" in label:
Expand All @@ -231,9 +232,45 @@ def redeploy_check(labels, service, redeploy) -> int:
return redeploy + 1
return 0

def redeploy_check_for_non_container(commit, service, redeploy, healthcheckConfigs):
if healthcheckConfigs.get("HEALTHCHECK_REDEPLOY_WHEN_UNHEALTHY") == "True":
log.info(f"Auto redeployment is enabled on service {service}")
max_retry = int(healthcheckConfigs.get("HEALTHCHECK_REDEPLOY_MAX_RETRY", REDEPLOY_MAX_RETRY))
if redeploy < max_retry:
log.info(f"redeploy is {redeploy}; max retry is {max_retry}")
send_statsboard_metric(name='deployd.service_health_status', value=1,
tags={"status": "redeploy", "service": service, "commit": commit})
return "redeploy-" + str(redeploy + 1)
send_statsboard_metric(name='deployd.service_health_status', value=1,
tags={"status": "unhealthy", "service": service, "commit": commit})
return service + ":unhealthy"

def redeploy_check_without_container_status(commit, service, redeploy):
log.info(f"Get health info for service {service} with commit {commit}")
fn = os.path.join("/mnt/deployd/", "{}_HEALTHCHECK".format(service))
if not os.path.isfile(fn):
return None
with open(fn, 'r') as f:
healthcheckConfigs = dict((n.strip('\"\n\' ') for n in line.split("=", 1)) for line in f)
if "HEALTHCHECK_HTTP" not in healthcheckConfigs:
return None
log.info(f"Healthcheck is enabled on service {service}")
url = healthcheckConfigs["HEALTHCHECK_HTTP"]
if "http://" not in url:
url = "http://" + url
try:
resp = requests.get(url)
if resp.status_code >= 200 and resp.status_code < 300:
send_statsboard_metric(name='deployd.service_health_status', value=1,
tags={"status": "healthy", "service": service, "commit": commit})
return service + ":healthy"
except requests.ConnectionError:
return redeploy_check_for_non_container(commit, service, redeploy, healthcheckConfigs)
return redeploy_check_for_non_container(commit, service, redeploy, healthcheckConfigs)

def get_container_health_info(commit, service, redeploy) -> Optional[str]:
try:
log.info(f"Get health info for container with commit {commit}")
log.info(f"Get health info for service {service} with commit {commit}")
result = []
cmd = ['docker', 'ps', '--format', '{{.Image}};{{.Names}};{{.Labels}}']
output = subprocess.run(cmd, check=True, stdout=subprocess.PIPE).stdout
Expand All @@ -250,7 +287,7 @@ def get_container_health_info(commit, service, redeploy) -> Optional[str]:
status = status.decode().strip()
if status == "unhealthy" and "redeploy_when_unhealthy=enabled" in parts[2]:
labels = parts[2].split(',')
ret = redeploy_check(labels, service, redeploy)
ret = redeploy_check_for_container(labels, service, redeploy)
if ret > 0:
send_statsboard_metric(name='deployd.service_health_status', value=1,
tags={"status": "redeploy", "service": service, "commit": commit})
Expand All @@ -265,9 +302,8 @@ def get_container_health_info(commit, service, redeploy) -> Optional[str]:
elif returnValue and "unhealthy" not in returnValue:
send_statsboard_metric(name='deployd.service_health_status', value=1,
tags={"status": "healthy", "service": service, "commit": commit})
return returnValue
else:
return None
# if no check happens for the current service, check if it is a non container with healthcheck enabled
return redeploy_check_without_container_status(commit, service, redeploy)
except:
log.error(f"Failed to get container health info with commit {commit}")
return None
Expand Down

0 comments on commit 18065dd

Please sign in to comment.