Skip to content

Commit

Permalink
feat: add enable_alerts_on_restart to DAQJobHealthcheck
Browse files Browse the repository at this point in the history
  • Loading branch information
furkan-bilgin committed Oct 15, 2024
1 parent 1a85ea3 commit b4d092f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
19 changes: 18 additions & 1 deletion src/daq/jobs/healthcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from dataclasses_json import DataClassJsonMixin

from daq.alert.base import DAQAlertInfo, DAQJobMessageAlert
from daq.alert.base import DAQAlertInfo, DAQAlertSeverity, DAQJobMessageAlert
from daq.base import DAQJob
from daq.jobs.handle_stats import DAQJobMessageStats, DAQJobStatsDict
from daq.models import DAQJobConfig, DAQJobStats
Expand Down Expand Up @@ -53,6 +53,7 @@ def parse_interval(self) -> timedelta:
@dataclass
class DAQJobHealthcheckConfig(DAQJobConfig):
healthcheck_stats: list[HealthcheckStatsItem]
enable_alerts_on_restart: bool = True


class DAQJobHealthcheck(DAQJob):
Expand All @@ -70,6 +71,22 @@ def __init__(self, config: DAQJobHealthcheckConfig):
self._current_stats = {}

super().__init__(config)

if config.enable_alerts_on_restart:
for daq_job_type, daq_job_type_class in self._daq_job_type_to_class.items():
self.config.healthcheck_stats.append(
HealthcheckStatsItem(
alert_info=DAQAlertInfo(
message=f"{daq_job_type_class.__name__} crashed and got restarted!",
severity=DAQAlertSeverity.ERROR,
),
daq_job_type=daq_job_type,
alert_if_interval_is=AlertCondition.SATISFIED,
stats_key="restart_stats",
interval="1m",
)
)

# Sanity check config
for item in config.healthcheck_stats:
if item.alert_if_interval_is not in AlertCondition:
Expand Down
1 change: 1 addition & 0 deletions src/tests/test_healthcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def setUp(self):
self.config = DAQJobHealthcheckConfig(
daq_job_type="test",
healthcheck_stats=[self.healthcheck_item],
enable_alerts_on_restart=False,
)
self.daq_job_healthcheck = DAQJobHealthcheck(self.config)

Expand Down

0 comments on commit b4d092f

Please sign in to comment.