Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

107 analysis module backup storage fill alert #148

Merged
merged 7 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions apps/analyzer/metadata_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,10 @@ def simple_rule_based_analysis_creation_dates(alert_limit):
start_date = Analyzer._get_start_date(data, "CREATION_DATE_ALERT", None)
result = Analyzer.simple_rule_based_analyzer.analyze_creation_dates(data, alert_limit, start_date)
return result

def simple_rule_based_analysis_storage_capacity(alert_limit):
data = list(Analyzer.database.get_data_stores())
result = Analyzer.simple_rule_based_analyzer.analyze_storage_capacity(
data, alert_limit
)
return result
5 changes: 5 additions & 0 deletions apps/analyzer/metadata_analyzer/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def create_creation_date_alert(self, alert):
r = requests.post(url, json=alert)
r.raise_for_status()

def create_storage_fill_alert(self, alert):
url = self.backend_url + "alerting/storageFill"
r = requests.post(url, json=alert)
r.raise_for_status()

def get_latest_alert_id(self, alert_type, backup_type=None):
url = self.backend_url + f"alerting/type/{alert_type}/latest"
if backup_type != None:
Expand Down
6 changes: 3 additions & 3 deletions apps/analyzer/metadata_analyzer/creation_date_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def __init__(self, result, reference_date):

def as_json(self):
return {
"backupId": self.uuid,
"date": self.date.isoformat(),
"referenceDate": self.reference_date.isoformat()
"backupId": self.uuid,
"date": self.date.isoformat(),
"referenceDate": self.reference_date.isoformat(),
}
10 changes: 8 additions & 2 deletions apps/analyzer/metadata_analyzer/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pg8000.dbapi
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from metadata_analyzer.models import BackupData, Result, Tasks
from metadata_analyzer.models import BackupData, Result, Tasks, DataStore
import os


Expand Down Expand Up @@ -33,7 +33,6 @@ def get_data(self):
result = session.scalars(stmt)
return result


def get_results(self):
session = Session(self.engine)
stmt = select(Result)
Expand All @@ -47,3 +46,10 @@ def get_tasks(self):

result = session.scalars(stmt)
return result

def get_data_stores(self):
session = Session(self.engine)
stmt = select(DataStore)

result = session.scalars(stmt)
return result
29 changes: 29 additions & 0 deletions apps/analyzer/metadata_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,35 @@ def simple_rule_based_analysis_creation_dates():
except ValueError:
return "Invalid value for alert limit", 400


@app.route("/simpleRuleBasedAnalysisStorageCapacity", methods=["POST"])
def simple_rule_based_analysis_storage_capacity():
"""Runs a simple rule based analysis on data stores searching for ones with
almost full
---
parameters:
- name: Input
in: query
name: alertLimit
schema:
type: integer
responses:
200:
description: Number of created alerts
400:
description: The value set for the alert limit was not valid
"""
alert_limit = request.args.get("alertLimit")

try:
int(alert_limit)
return jsonify(
Analyzer.simple_rule_based_analysis_storage_capacity(int(alert_limit))
)
except ValueError:
return "Invalid value for alert limit", 400


def main():
database = Database()
backend = Backend(os.getenv("BACKEND_URL"))
Expand Down
17 changes: 17 additions & 0 deletions apps/analyzer/metadata_analyzer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,20 @@ def __repr__(self):

def __str__(self):
return repr(self)


class DataStore(Base):
__tablename__ = "data_stores"

# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
capacity: Mapped[float]
high_water_mark: Mapped[float]
filled: Mapped[float]
stored: Mapped[float]

def __repr__(self):
return f"""DataStore(name={self.name})"""

def __str__(self):
return repr(self)
26 changes: 26 additions & 0 deletions apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timedelta
from metadata_analyzer.size_alert import SizeAlert
from metadata_analyzer.creation_date_alert import CreationDateAlert
from metadata_analyzer.storage_fill_alert import StorageFillAlert


class SimpleRuleBasedAnalyzer:
Expand Down Expand Up @@ -285,3 +286,28 @@ def _analyze_creation_dates_of_one_task(self, results, start_date):
times.append(result.start_time)

return alerts

# Search for data stores that are almost full
def analyze_storage_capacity(self, data, alert_limit):
alerts = []
for data_store in data:
# Skip data stores with missing data
if (
data_store.capacity is None
or data_store.filled is None
or data_store.high_water_mark is None
):
continue
if data_store.filled > data_store.high_water_mark:
alerts.append(StorageFillAlert(data_store))

if alert_limit is None:
alert_limit = 10

# Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1
count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts))
# Send the alerts to the backend
for alert in alerts[:count]:
self.backend.create_storage_fill_alert(alert.as_json())

return {"count": count}
14 changes: 14 additions & 0 deletions apps/analyzer/metadata_analyzer/storage_fill_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class StorageFillAlert:
def __init__(self, data_store):
self.name = data_store.name
self.capacity = data_store.capacity
self.filled = data_store.filled
self.high_water_mark = data_store.high_water_mark

def as_json(self):
return {
"dataStoreName": self.name,
"capacity": self.capacity,
"filled": self.filled,
"highWaterMark": self.high_water_mark,
}
8 changes: 6 additions & 2 deletions apps/analyzer/tests/mock_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ def __init__(self):
self.backups = []
self.tasks = []
self.size_alerts = []
self.creation_date_alerts = []
self.creation_date_alerts = []
self.storage_fill_alerts = []
self.latest_alert_ids = {}

def send_backup_data_batched(self, batch):
Expand All @@ -18,11 +19,14 @@ def create_creation_date_alert(self, alert):
def create_size_alert(self, alert):
self.size_alerts.append(alert)

def create_storage_fill_alert(self, alert):
self.storage_fill_alerts.append(alert)

def set_latest_alert_id(self, alert_type, backup_type, uuid):
self.latest_alert_ids[(alert_type, backup_type)] = uuid

def get_latest_alert_id(self, alert_type, backup_type=None):
if (alert_type, backup_type) in self.latest_alert_ids:
return self.latest_alert_ids[(alert_type, backup_type)]
else:
return ""
return ""
18 changes: 11 additions & 7 deletions apps/analyzer/tests/mock_database.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
class MockDatabase:
def __init__(self, results, tasks = []):
self.results = results
self.tasks = tasks
def __init__(self, results, tasks=[], data_stores=[]):
self.results = results
self.tasks = tasks
self.data_stores = data_stores

def get_results(self):
return iter(self.results)
def get_results(self):
return iter(self.results)

def get_tasks(self):
return iter(self.tasks)
def get_tasks(self):
return iter(self.tasks)

def get_data_stores(self):
return iter(self.data_stores)
95 changes: 94 additions & 1 deletion apps/analyzer/tests/test_simple_rule_based_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime

from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.models import Result
from metadata_analyzer.models import Result, DataStore
from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer
from tests.mock_backend import MockBackend
from tests.mock_database import MockDatabase
Expand All @@ -16,6 +16,14 @@ def _create_mock_result(task, uuid, fdi_type, data_size, start_time):
mock_result.start_time = start_time
return mock_result

def _create_mock_data_store(name, capacity, high_water_mark, filled):
mock_data_store = DataStore()
mock_data_store.name = name
mock_data_store.capacity = capacity
mock_data_store.high_water_mark = high_water_mark
mock_data_store.filled = filled
return mock_data_store


def test_alert():
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"))
Expand Down Expand Up @@ -579,3 +587,88 @@ def test_alert_latest_creation_date():
"referenceDate": "2000-01-04T16:00:00",
"backupId": mock_result4.uuid
}]

# Tests for the storage fill alerts


# Empty data store should not generate an alert
def test_storage_fill_alert_empty():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 0)

database = MockDatabase([], [], [mock_data_store1])
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []


# Data stores with enough capacity left should not generate an alert
def test_storage_fill_alert_enough_capacity_left():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 20)
mock_data_store2 = _create_mock_data_store("bar", 120, 90, 80)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, 50)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []


# Data stores with less than enough capacity left should generate an alert
def test_storage_fill_alert_enough_capacity_left():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 81)
mock_data_store2 = _create_mock_data_store("bar", 120, 90, 100)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, 150)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == [
{
"dataStoreName": mock_data_store1.name,
"capacity": mock_data_store1.capacity,
"filled": mock_data_store1.filled,
"highWaterMark": mock_data_store1.high_water_mark,
},
{
"dataStoreName": mock_data_store2.name,
"capacity": mock_data_store2.capacity,
"filled": mock_data_store2.filled,
"highWaterMark": mock_data_store2.high_water_mark,
},
{
"dataStoreName": mock_data_store3.name,
"capacity": mock_data_store3.capacity,
"filled": mock_data_store3.filled,
"highWaterMark": mock_data_store3.high_water_mark,
},
]


# Data stores with missing data should not generate an alert
def test_storage_fill_alert_missing_data():
mock_data_store1 = _create_mock_data_store("foo", None, 80, 100)
mock_data_store2 = _create_mock_data_store("bar", 120, None, 120)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, None)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []
Loading