Skip to content

Commit

Permalink
fix: Better Demo Alerts (#2607)
Browse files Browse the repository at this point in the history
  • Loading branch information
Matvey-Kuk authored Nov 24, 2024
1 parent d9cf4e3 commit cda1363
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 12 deletions.
29 changes: 19 additions & 10 deletions keep/api/core/demo_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
{
"sqlQuery": {"sql": "((name like :name_1))", "params": {"name_1": "%mq%"}},
"groupDescription": "This rule groups all alerts related to MQ.",
"ruleName": "Message Queue Buckle Up",
"ruleName": "Message queue is getting filled up",
"celQuery": '(name.contains("mq"))',
"timeframeInSeconds": 86400,
"timeUnit": "hours",
Expand Down Expand Up @@ -243,6 +243,14 @@ def get_or_create_topology(keep_api_key, keep_api_url):
if service["name"] == existing_service["display_name"]:
service["id"] = existing_service["id"]

# Check if any service does not have an id
for service in application_to_create["services"]:
if "id" not in service:
logger.error(
f"Service {service['name']} does not have an id. Application creation failed."
)
return True

response = requests.post(
f"{keep_api_url}/topology/applications",
headers={"x-api-key": keep_api_key},
Expand Down Expand Up @@ -415,21 +423,22 @@ def simulate_alerts(
time.sleep(sleep_interval)


def launch_demo_mode_thread(keep_api_url=None) -> threading.Thread | None:
def launch_demo_mode_thread(keep_api_url=None, keep_api_key=None) -> threading.Thread | None:
if not KEEP_LIVE_DEMO_MODE:
logger.info("Not launching the demo mode.")
return

logger.info("Launching demo mode.")

with get_session_sync() as session:
keep_api_key = get_or_create_api_key(
session=session,
tenant_id=SINGLE_TENANT_UUID,
created_by="system",
unique_api_key_id="simulate_alerts",
system_description="Simulate Alerts API key",
)
if keep_api_key is None:
with get_session_sync() as session:
keep_api_key = get_or_create_api_key(
session=session,
tenant_id=SINGLE_TENANT_UUID,
created_by="system",
unique_api_key_id="simulate_alerts",
system_description="Simulate Alerts API key",
)

sleep_interval = 5

Expand Down
18 changes: 18 additions & 0 deletions keep/providers/datadog_provider/alerts_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,22 @@
"priority": ["P1", "P3", "P4"],
},
},
"mq_consumer_struggling": {
"payload": {
"title": "mq consumer is struggling",
"type": "metric alert",
"query": "avg(last_1h):min:mq_processing{*} by {host} < 10",
"message": "MQ Consumer is processing less than 10 messages per second on {{host.name}}.",
"tags": "environment:production,team:database",
"priority": 4,
"monitor_id": "1234567891",
},
"parameters": {
"tags": [
"environment:production,team:analytics,monitor,service:api",
"environment:staging,team:database,monitor,service:api",
],
"priority": ["P1", "P3", "P4"],
},
},
}
16 changes: 15 additions & 1 deletion keep/providers/prometheus_provider/alerts_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"labels.instance": ["instance1", "instance2", "instance3"],
},
},
"mq_third_full": {
"mq_third_full (Message queue is over 33%)": {
"payload": {
"summary": "Message queue is over 33% capacity",
"labels": {
Expand All @@ -29,6 +29,20 @@
"labels.mq_manager": ["mq_manager1", "mq_manager2", "mq_manager3"],
},
},
"mq_full (Message queue is full)": {
"payload": {
"summary": "Message queue is over 90% capacity",
"labels": {
"severity": "critical",
"customer_id": "acme"
},
},
"parameters": {
"labels.queue": ["queue4"],
"labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "queue"],
"labels.mq_manager": ["mq_manager4"],
},
},
"disk_space_low": {
"payload": {
"summary": "Disk space is below 20%",
Expand Down
3 changes: 2 additions & 1 deletion keep/server_jobs_bg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def main():
# We intentionally don't use KEEP_API_URL here to avoid going through the internet.
# Script should be launched in the same environment as the server.
keep_api_url = "http://localhost:" + str(os.environ.get("PORT", 8080))
keep_api_key = os.environ.get("KEEP_LIVE_DEMO_MODE_API_KEY")

while True:
try:
Expand All @@ -27,7 +28,7 @@ def main():
time.sleep(5)

threads = []
threads.append(launch_demo_mode_thread(keep_api_url))
threads.append(launch_demo_mode_thread(keep_api_url, keep_api_key))
threads.append(launch_uptime_reporting_thread())

logger.info("Background server jobs threads launched, joining them.")
Expand Down

0 comments on commit cda1363

Please sign in to comment.