Skip to content

Commit

Permalink
apply the same notification rules on zd events to tg events
Browse files Browse the repository at this point in the history
  • Loading branch information
ayazabbas committed Jun 12, 2024
1 parent ab41a4d commit 3c932a1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,18 @@ Event types are configured via environment variables:

- `TelegramEvent`
- `TELEGRAM_BOT_TOKEN` - API token for the Telegram bot
- `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts

- `ZendutyEvent`
- `ZENDUTY_INTEGRATION_KEY` - Integration key for Zenduty service API integration
- `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts

### Zenduty Alert Thresholds
- Zenduty alert will fire if a check fails 5 or more times within 5 minutes.
- The alert will be resolved if the check failed < 4 times within 5 minutes.
### Alert Thresholds
- Alert thresholds apply to ZendutyEvent and TelegramEvent (resolution only applies to zenduty)
- Checks run approximately once per minute.
- These thresholds can be overridden per check type in config.yaml
- `zenduty_alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5)
- `zenduty_resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3)
- `alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5)
- `resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3)

## Finding the Telegram Group Chat ID

Expand Down
21 changes: 11 additions & 10 deletions pyth_observer/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, config, publishers):
self.open_alerts = self.load_alerts()
# below is used to store events to later send if mutilple failures occur
# events cannot be stored in open_alerts as they are not JSON serializable.
self.zenduty_events = {}
self.delayed_events = {}

def load_alerts(self):
try:
Expand Down Expand Up @@ -79,7 +79,7 @@ async def run(self, states: List[State]):
for event_type in self.config["events"]:
event: Event = globals()[event_type](check, context)

if event_type == "ZendutyEvent":
if event_type in ["ZendutyEvent", "TelegramEvent"]:
alert_identifier = self.generate_alert_identifier(check)
alert = self.open_alerts.get(alert_identifier)
if alert is None:
Expand All @@ -89,11 +89,12 @@ async def run(self, states: List[State]):
"failures": 1,
"last_window_failures": None,
"sent": False,
"event_type": event_type
}
else:
alert["failures"] += 1
self.zenduty_events[alert_identifier] = event
continue # Skip sending immediately for ZendutyEvent
self.delayed_events[alert_identifier] = event
continue # Skip sending immediately for ZendutyEvent or TelegramEvent

sent_events.append(event.send())

Expand Down Expand Up @@ -177,8 +178,8 @@ async def process_zenduty_events(self, current_time):
for identifier, info in self.open_alerts.items():
self.check_zd_alert_status(identifier, current_time)
check_config = self.config["checks"]["global"][info["type"]]
alert_threshold = check_config.get("zenduty_alert_threshold", 5)
resolution_threshold = check_config.get("zenduty_resolution_threshold", 3)
alert_threshold = check_config.get("alert_threshold", 5)
resolution_threshold = check_config.get("resolution_threshold", 3)
# Resolve the alert if raised and failed < $threshold times in the last 5m window
resolved = False
if (
Expand All @@ -187,7 +188,7 @@ async def process_zenduty_events(self, current_time):
):
logger.debug(f"Resolving Zenduty alert {identifier}")
resolved = True
if info["sent"]:
if info["sent"] and info.get("event_type", "ZendutyEvent") == "ZendutyEvent":
response = await send_zenduty_alert(
identifier, identifier, resolved=True
)
Expand All @@ -208,16 +209,16 @@ async def process_zenduty_events(self, current_time):
logger.debug(f"Raising Zenduty alert {identifier}")
self.open_alerts[identifier]["sent"] = True
self.open_alerts[identifier]["last_alert"] = current_time.isoformat()
event = self.zenduty_events.get(identifier)
event = self.delayed_events.get(identifier)
if event:
to_alert.append(event.send())

await asyncio.gather(*to_alert)
for identifier in to_remove:
if self.open_alerts.get(identifier):
del self.open_alerts[identifier]
if self.zenduty_events.get(identifier):
del self.zenduty_events[identifier]
if self.delayed_events.get(identifier):
del self.delayed_events[identifier]

with open(self.open_alerts_file, "w") as file:
json.dump(self.open_alerts, file)

0 comments on commit 3c932a1

Please sign in to comment.