diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index f3f81b3..2def0f5 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -135,3 +135,10 @@ def simple_rule_based_analysis_creation_dates(alert_limit): start_date = Analyzer._get_start_date(data, "CREATION_DATE_ALERT", None) result = Analyzer.simple_rule_based_analyzer.analyze_creation_dates(data, alert_limit, start_date) return result + + def simple_rule_based_analysis_storage_capacity(alert_limit): + data = list(Analyzer.database.get_data_stores()) + result = Analyzer.simple_rule_based_analyzer.analyze_storage_capacity( + data, alert_limit + ) + return result diff --git a/apps/analyzer/metadata_analyzer/backend.py b/apps/analyzer/metadata_analyzer/backend.py index 0c7f39b..63d9c5c 100644 --- a/apps/analyzer/metadata_analyzer/backend.py +++ b/apps/analyzer/metadata_analyzer/backend.py @@ -24,6 +24,11 @@ def create_creation_date_alert(self, alert): r = requests.post(url, json=alert) r.raise_for_status() + def create_storage_fill_alert(self, alert): + url = self.backend_url + "alerting/storageFill" + r = requests.post(url, json=alert) + r.raise_for_status() + def get_latest_alert_id(self, alert_type, backup_type=None): url = self.backend_url + f"alerting/type/{alert_type}/latest" if backup_type != None: diff --git a/apps/analyzer/metadata_analyzer/creation_date_alert.py b/apps/analyzer/metadata_analyzer/creation_date_alert.py index ab0dbc6..eb4e174 100644 --- a/apps/analyzer/metadata_analyzer/creation_date_alert.py +++ b/apps/analyzer/metadata_analyzer/creation_date_alert.py @@ -6,7 +6,7 @@ def __init__(self, result, reference_date): def as_json(self): return { - "backupId": self.uuid, - "date": self.date.isoformat(), - "referenceDate": self.reference_date.isoformat() + "backupId": self.uuid, + "date": self.date.isoformat(), + "referenceDate": self.reference_date.isoformat(), } diff --git a/apps/analyzer/metadata_analyzer/database.py b/apps/analyzer/metadata_analyzer/database.py index b2e041b..07fc91c 100644 --- a/apps/analyzer/metadata_analyzer/database.py +++ b/apps/analyzer/metadata_analyzer/database.py @@ -1,7 +1,7 @@ import pg8000.dbapi from sqlalchemy import create_engine, select from sqlalchemy.orm import Session -from metadata_analyzer.models import BackupData, Result, Tasks +from metadata_analyzer.models import BackupData, Result, Tasks, DataStore import os @@ -33,7 +33,6 @@ def get_data(self): result = session.scalars(stmt) return result - def get_results(self): session = Session(self.engine) stmt = select(Result) @@ -47,3 +46,10 @@ def get_tasks(self): result = session.scalars(stmt) return result + + def get_data_stores(self): + session = Session(self.engine) + stmt = select(DataStore) + + result = session.scalars(stmt) + return result diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index b8fa55a..f943057 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -317,6 +317,35 @@ def simple_rule_based_analysis_creation_dates(): except ValueError: return "Invalid value for alert limit", 400 + +@app.route("/simpleRuleBasedAnalysisStorageCapacity", methods=["POST"]) +def simple_rule_based_analysis_storage_capacity(): + """Runs a simple rule based analysis on data stores searching for ones with + almost full + --- + parameters: + - name: Input + in: query + name: alertLimit + schema: + type: integer + responses: + 200: + description: Number of created alerts + 400: + description: The value set for the alert limit was not valid + """ + alert_limit = request.args.get("alertLimit") + + try: + int(alert_limit) + return jsonify( + Analyzer.simple_rule_based_analysis_storage_capacity(int(alert_limit)) + ) + except ValueError: + return "Invalid value for alert limit", 400 + + def main(): database = Database() backend = Backend(os.getenv("BACKEND_URL")) diff --git a/apps/analyzer/metadata_analyzer/models.py b/apps/analyzer/metadata_analyzer/models.py index 7c2e0cb..02be4e5 100644 --- a/apps/analyzer/metadata_analyzer/models.py +++ b/apps/analyzer/metadata_analyzer/models.py @@ -59,3 +59,20 @@ def __repr__(self): def __str__(self): return repr(self) + + +class DataStore(Base): + __tablename__ = "data_stores" + + # For now I only added the most relevant columns + name: Mapped[str] = mapped_column(primary_key=True) + capacity: Mapped[float] + high_water_mark: Mapped[float] + filled: Mapped[float] + stored: Mapped[float] + + def __repr__(self): + return f"""DataStore(name={self.name})""" + + def __str__(self): + return repr(self) diff --git a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py index f679a79..bd4194a 100644 --- a/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py +++ b/apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py @@ -4,6 +4,7 @@ from datetime import datetime, timedelta from metadata_analyzer.size_alert import SizeAlert from metadata_analyzer.creation_date_alert import CreationDateAlert +from metadata_analyzer.storage_fill_alert import StorageFillAlert class SimpleRuleBasedAnalyzer: @@ -285,3 +286,28 @@ def _analyze_creation_dates_of_one_task(self, results, start_date): times.append(result.start_time) return alerts + + # Search for data stores that are almost full + def analyze_storage_capacity(self, data, alert_limit): + alerts = [] + for data_store in data: + # Skip data stores with missing data + if ( + data_store.capacity is None + or data_store.filled is None + or data_store.high_water_mark is None + ): + continue + if data_store.filled > data_store.high_water_mark: + alerts.append(StorageFillAlert(data_store)) + + if alert_limit is None: + alert_limit = 10 + + # Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1 + count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts)) + # Send the alerts to the backend + for alert in alerts[:count]: + self.backend.create_storage_fill_alert(alert.as_json()) + + return {"count": count} diff --git a/apps/analyzer/metadata_analyzer/storage_fill_alert.py b/apps/analyzer/metadata_analyzer/storage_fill_alert.py new file mode 100644 index 0000000..3a18fbc --- /dev/null +++ b/apps/analyzer/metadata_analyzer/storage_fill_alert.py @@ -0,0 +1,14 @@ +class StorageFillAlert: + def __init__(self, data_store): + self.name = data_store.name + self.capacity = data_store.capacity + self.filled = data_store.filled + self.high_water_mark = data_store.high_water_mark + + def as_json(self): + return { + "dataStoreName": self.name, + "capacity": self.capacity, + "filled": self.filled, + "highWaterMark": self.high_water_mark, + } diff --git a/apps/analyzer/tests/mock_backend.py b/apps/analyzer/tests/mock_backend.py index b253a96..fab9e69 100644 --- a/apps/analyzer/tests/mock_backend.py +++ b/apps/analyzer/tests/mock_backend.py @@ -3,7 +3,8 @@ def __init__(self): self.backups = [] self.tasks = [] self.size_alerts = [] - self.creation_date_alerts = [] + self.creation_date_alerts = [] + self.storage_fill_alerts = [] self.latest_alert_ids = {} def send_backup_data_batched(self, batch): @@ -18,6 +19,9 @@ def create_creation_date_alert(self, alert): def create_size_alert(self, alert): self.size_alerts.append(alert) + def create_storage_fill_alert(self, alert): + self.storage_fill_alerts.append(alert) + def set_latest_alert_id(self, alert_type, backup_type, uuid): self.latest_alert_ids[(alert_type, backup_type)] = uuid @@ -25,4 +29,4 @@ def get_latest_alert_id(self, alert_type, backup_type=None): if (alert_type, backup_type) in self.latest_alert_ids: return self.latest_alert_ids[(alert_type, backup_type)] else: - return "" \ No newline at end of file + return "" diff --git a/apps/analyzer/tests/mock_database.py b/apps/analyzer/tests/mock_database.py index 2d20efc..bcfe681 100644 --- a/apps/analyzer/tests/mock_database.py +++ b/apps/analyzer/tests/mock_database.py @@ -1,10 +1,14 @@ class MockDatabase: - def __init__(self, results, tasks = []): - self.results = results - self.tasks = tasks + def __init__(self, results, tasks=[], data_stores=[]): + self.results = results + self.tasks = tasks + self.data_stores = data_stores - def get_results(self): - return iter(self.results) + def get_results(self): + return iter(self.results) - def get_tasks(self): - return iter(self.tasks) + def get_tasks(self): + return iter(self.tasks) + + def get_data_stores(self): + return iter(self.data_stores) diff --git a/apps/analyzer/tests/test_simple_rule_based_analyzer.py b/apps/analyzer/tests/test_simple_rule_based_analyzer.py index 1844789..b986d3c 100644 --- a/apps/analyzer/tests/test_simple_rule_based_analyzer.py +++ b/apps/analyzer/tests/test_simple_rule_based_analyzer.py @@ -1,7 +1,7 @@ from datetime import datetime from metadata_analyzer.analyzer import Analyzer -from metadata_analyzer.models import Result +from metadata_analyzer.models import Result, DataStore from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer from tests.mock_backend import MockBackend from tests.mock_database import MockDatabase @@ -16,6 +16,14 @@ def _create_mock_result(task, uuid, fdi_type, data_size, start_time): mock_result.start_time = start_time return mock_result +def _create_mock_data_store(name, capacity, high_water_mark, filled): + mock_data_store = DataStore() + mock_data_store.name = name + mock_data_store.capacity = capacity + mock_data_store.high_water_mark = high_water_mark + mock_data_store.filled = filled + return mock_data_store + def test_alert(): mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01")) @@ -579,3 +587,88 @@ def test_alert_latest_creation_date(): "referenceDate": "2000-01-04T16:00:00", "backupId": mock_result4.uuid }] + +# Tests for the storage fill alerts + + +# Empty data store should not generate an alert +def test_storage_fill_alert_empty(): + mock_data_store1 = _create_mock_data_store("foo", 100, 80, 0) + + database = MockDatabase([], [], [mock_data_store1]) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_storage_capacity(-1) + + assert backend.storage_fill_alerts == [] + + +# Data stores with enough capacity left should not generate an alert +def test_storage_fill_alert_enough_capacity_left(): + mock_data_store1 = _create_mock_data_store("foo", 100, 80, 20) + mock_data_store2 = _create_mock_data_store("bar", 120, 90, 80) + mock_data_store3 = _create_mock_data_store("baz", 150, 50, 50) + + database = MockDatabase( + [], [], [mock_data_store1, mock_data_store2, mock_data_store3] + ) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_storage_capacity(-1) + + assert backend.storage_fill_alerts == [] + + +# Data stores with less than enough capacity left should generate an alert +def test_storage_fill_alert_enough_capacity_left(): + mock_data_store1 = _create_mock_data_store("foo", 100, 80, 81) + mock_data_store2 = _create_mock_data_store("bar", 120, 90, 100) + mock_data_store3 = _create_mock_data_store("baz", 150, 50, 150) + + database = MockDatabase( + [], [], [mock_data_store1, mock_data_store2, mock_data_store3] + ) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_storage_capacity(-1) + + assert backend.storage_fill_alerts == [ + { + "dataStoreName": mock_data_store1.name, + "capacity": mock_data_store1.capacity, + "filled": mock_data_store1.filled, + "highWaterMark": mock_data_store1.high_water_mark, + }, + { + "dataStoreName": mock_data_store2.name, + "capacity": mock_data_store2.capacity, + "filled": mock_data_store2.filled, + "highWaterMark": mock_data_store2.high_water_mark, + }, + { + "dataStoreName": mock_data_store3.name, + "capacity": mock_data_store3.capacity, + "filled": mock_data_store3.filled, + "highWaterMark": mock_data_store3.high_water_mark, + }, + ] + + +# Data stores with missing data should not generate an alert +def test_storage_fill_alert_missing_data(): + mock_data_store1 = _create_mock_data_store("foo", None, 80, 100) + mock_data_store2 = _create_mock_data_store("bar", 120, None, 120) + mock_data_store3 = _create_mock_data_store("baz", 150, 50, None) + + database = MockDatabase( + [], [], [mock_data_store1, mock_data_store2, mock_data_store3] + ) + backend = MockBackend() + simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) + Analyzer.init(database, backend, None, simple_rule_based_analyzer) + Analyzer.simple_rule_based_analysis_storage_capacity(-1) + + assert backend.storage_fill_alerts == []