From cda1363b8000751b4802ce6acf4975a1e49a3c8e Mon Sep 17 00:00:00 2001 From: Matvey Kukuy Date: Sun, 24 Nov 2024 13:50:43 +0200 Subject: [PATCH] fix: Better Demo Alerts (#2607) --- keep/api/core/demo_mode.py | 29 ++++++++++++------- .../providers/datadog_provider/alerts_mock.py | 18 ++++++++++++ .../prometheus_provider/alerts_mock.py | 16 +++++++++- keep/server_jobs_bg.py | 3 +- 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/keep/api/core/demo_mode.py b/keep/api/core/demo_mode.py index 26ec63add..4ffc06166 100644 --- a/keep/api/core/demo_mode.py +++ b/keep/api/core/demo_mode.py @@ -29,7 +29,7 @@ { "sqlQuery": {"sql": "((name like :name_1))", "params": {"name_1": "%mq%"}}, "groupDescription": "This rule groups all alerts related to MQ.", - "ruleName": "Message Queue Buckle Up", + "ruleName": "Message queue is getting filled up", "celQuery": '(name.contains("mq"))', "timeframeInSeconds": 86400, "timeUnit": "hours", @@ -243,6 +243,14 @@ def get_or_create_topology(keep_api_key, keep_api_url): if service["name"] == existing_service["display_name"]: service["id"] = existing_service["id"] + # Check if any service does not have an id + for service in application_to_create["services"]: + if "id" not in service: + logger.error( + f"Service {service['name']} does not have an id. Application creation failed." + ) + return True + response = requests.post( f"{keep_api_url}/topology/applications", headers={"x-api-key": keep_api_key}, @@ -415,21 +423,22 @@ def simulate_alerts( time.sleep(sleep_interval) -def launch_demo_mode_thread(keep_api_url=None) -> threading.Thread | None: +def launch_demo_mode_thread(keep_api_url=None, keep_api_key=None) -> threading.Thread | None: if not KEEP_LIVE_DEMO_MODE: logger.info("Not launching the demo mode.") return logger.info("Launching demo mode.") - with get_session_sync() as session: - keep_api_key = get_or_create_api_key( - session=session, - tenant_id=SINGLE_TENANT_UUID, - created_by="system", - unique_api_key_id="simulate_alerts", - system_description="Simulate Alerts API key", - ) + if keep_api_key is None: + with get_session_sync() as session: + keep_api_key = get_or_create_api_key( + session=session, + tenant_id=SINGLE_TENANT_UUID, + created_by="system", + unique_api_key_id="simulate_alerts", + system_description="Simulate Alerts API key", + ) sleep_interval = 5 diff --git a/keep/providers/datadog_provider/alerts_mock.py b/keep/providers/datadog_provider/alerts_mock.py index 0ff032a3b..f4fcff3ce 100644 --- a/keep/providers/datadog_provider/alerts_mock.py +++ b/keep/providers/datadog_provider/alerts_mock.py @@ -35,4 +35,22 @@ "priority": ["P1", "P3", "P4"], }, }, + "mq_consumer_struggling": { + "payload": { + "title": "mq consumer is struggling", + "type": "metric alert", + "query": "avg(last_1h):min:mq_processing{*} by {host} < 10", + "message": "MQ Consumer is processing less than 10 messages per second on {{host.name}}.", + "tags": "environment:production,team:database", + "priority": 4, + "monitor_id": "1234567891", + }, + "parameters": { + "tags": [ + "environment:production,team:analytics,monitor,service:api", + "environment:staging,team:database,monitor,service:api", + ], + "priority": ["P1", "P3", "P4"], + }, + }, } diff --git a/keep/providers/prometheus_provider/alerts_mock.py b/keep/providers/prometheus_provider/alerts_mock.py index d29197074..fa5f7e922 100644 --- a/keep/providers/prometheus_provider/alerts_mock.py +++ b/keep/providers/prometheus_provider/alerts_mock.py @@ -15,7 +15,7 @@ "labels.instance": ["instance1", "instance2", "instance3"], }, }, - "mq_third_full": { + "mq_third_full (Message queue is over 33%)": { "payload": { "summary": "Message queue is over 33% capacity", "labels": { @@ -29,6 +29,20 @@ "labels.mq_manager": ["mq_manager1", "mq_manager2", "mq_manager3"], }, }, + "mq_full (Message queue is full)": { + "payload": { + "summary": "Message queue is over 90% capacity", + "labels": { + "severity": "critical", + "customer_id": "acme" + }, + }, + "parameters": { + "labels.queue": ["queue4"], + "labels.service": ["calendar-producer-java-otel-api-dd", "kafka", "queue"], + "labels.mq_manager": ["mq_manager4"], + }, + }, "disk_space_low": { "payload": { "summary": "Disk space is below 20%", diff --git a/keep/server_jobs_bg.py b/keep/server_jobs_bg.py index b1cad6390..bce6ca9c0 100644 --- a/keep/server_jobs_bg.py +++ b/keep/server_jobs_bg.py @@ -15,6 +15,7 @@ def main(): # We intentionally don't use KEEP_API_URL here to avoid going through the internet. # Script should be launched in the same environment as the server. keep_api_url = "http://localhost:" + str(os.environ.get("PORT", 8080)) + keep_api_key = os.environ.get("KEEP_LIVE_DEMO_MODE_API_KEY") while True: try: @@ -27,7 +28,7 @@ def main(): time.sleep(5) threads = [] - threads.append(launch_demo_mode_thread(keep_api_url)) + threads.append(launch_demo_mode_thread(keep_api_url, keep_api_key)) threads.append(launch_uptime_reporting_thread()) logger.info("Background server jobs threads launched, joining them.")