Skip to content

Commit

Permalink
Merge pull request #148 from amosproj/107-analysis-module-backup-stor…
Browse files Browse the repository at this point in the history
…age-fill-alert

107 analysis module backup storage fill alert
  • Loading branch information
flo0852 authored Dec 10, 2024
2 parents ea017ab + aa5c95f commit 56dd3ba
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 15 deletions.
7 changes: 7 additions & 0 deletions apps/analyzer/metadata_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,10 @@ def simple_rule_based_analysis_creation_dates(alert_limit):
start_date = Analyzer._get_start_date(data, "CREATION_DATE_ALERT", None)
result = Analyzer.simple_rule_based_analyzer.analyze_creation_dates(data, alert_limit, start_date)
return result

def simple_rule_based_analysis_storage_capacity(alert_limit):
data = list(Analyzer.database.get_data_stores())
result = Analyzer.simple_rule_based_analyzer.analyze_storage_capacity(
data, alert_limit
)
return result
5 changes: 5 additions & 0 deletions apps/analyzer/metadata_analyzer/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def create_creation_date_alert(self, alert):
r = requests.post(url, json=alert)
r.raise_for_status()

def create_storage_fill_alert(self, alert):
url = self.backend_url + "alerting/storageFill"
r = requests.post(url, json=alert)
r.raise_for_status()

def get_latest_alert_id(self, alert_type, backup_type=None):
url = self.backend_url + f"alerting/type/{alert_type}/latest"
if backup_type != None:
Expand Down
6 changes: 3 additions & 3 deletions apps/analyzer/metadata_analyzer/creation_date_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def __init__(self, result, reference_date):

def as_json(self):
return {
"backupId": self.uuid,
"date": self.date.isoformat(),
"referenceDate": self.reference_date.isoformat()
"backupId": self.uuid,
"date": self.date.isoformat(),
"referenceDate": self.reference_date.isoformat(),
}
10 changes: 8 additions & 2 deletions apps/analyzer/metadata_analyzer/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pg8000.dbapi
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from metadata_analyzer.models import BackupData, Result, Tasks
from metadata_analyzer.models import BackupData, Result, Tasks, DataStore
import os


Expand Down Expand Up @@ -33,7 +33,6 @@ def get_data(self):
result = session.scalars(stmt)
return result


def get_results(self):
session = Session(self.engine)
stmt = select(Result)
Expand All @@ -47,3 +46,10 @@ def get_tasks(self):

result = session.scalars(stmt)
return result

def get_data_stores(self):
session = Session(self.engine)
stmt = select(DataStore)

result = session.scalars(stmt)
return result
29 changes: 29 additions & 0 deletions apps/analyzer/metadata_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,35 @@ def simple_rule_based_analysis_creation_dates():
except ValueError:
return "Invalid value for alert limit", 400


@app.route("/simpleRuleBasedAnalysisStorageCapacity", methods=["POST"])
def simple_rule_based_analysis_storage_capacity():
"""Runs a simple rule based analysis on data stores searching for ones with
almost full
---
parameters:
- name: Input
in: query
name: alertLimit
schema:
type: integer
responses:
200:
description: Number of created alerts
400:
description: The value set for the alert limit was not valid
"""
alert_limit = request.args.get("alertLimit")

try:
int(alert_limit)
return jsonify(
Analyzer.simple_rule_based_analysis_storage_capacity(int(alert_limit))
)
except ValueError:
return "Invalid value for alert limit", 400


def main():
database = Database()
backend = Backend(os.getenv("BACKEND_URL"))
Expand Down
17 changes: 17 additions & 0 deletions apps/analyzer/metadata_analyzer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,20 @@ def __repr__(self):

def __str__(self):
return repr(self)


class DataStore(Base):
__tablename__ = "data_stores"

# For now I only added the most relevant columns
name: Mapped[str] = mapped_column(primary_key=True)
capacity: Mapped[float]
high_water_mark: Mapped[float]
filled: Mapped[float]
stored: Mapped[float]

def __repr__(self):
return f"""DataStore(name={self.name})"""

def __str__(self):
return repr(self)
26 changes: 26 additions & 0 deletions apps/analyzer/metadata_analyzer/simple_rule_based_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timedelta
from metadata_analyzer.size_alert import SizeAlert
from metadata_analyzer.creation_date_alert import CreationDateAlert
from metadata_analyzer.storage_fill_alert import StorageFillAlert


class SimpleRuleBasedAnalyzer:
Expand Down Expand Up @@ -285,3 +286,28 @@ def _analyze_creation_dates_of_one_task(self, results, start_date):
times.append(result.start_time)

return alerts

# Search for data stores that are almost full
def analyze_storage_capacity(self, data, alert_limit):
alerts = []
for data_store in data:
# Skip data stores with missing data
if (
data_store.capacity is None
or data_store.filled is None
or data_store.high_water_mark is None
):
continue
if data_store.filled > data_store.high_water_mark:
alerts.append(StorageFillAlert(data_store))

if alert_limit is None:
alert_limit = 10

# Only send a maximum of alert_limit alerts or all alerts if alert_limit is -1
count = len(alerts) if alert_limit == -1 else min(alert_limit, len(alerts))
# Send the alerts to the backend
for alert in alerts[:count]:
self.backend.create_storage_fill_alert(alert.as_json())

return {"count": count}
14 changes: 14 additions & 0 deletions apps/analyzer/metadata_analyzer/storage_fill_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class StorageFillAlert:
def __init__(self, data_store):
self.name = data_store.name
self.capacity = data_store.capacity
self.filled = data_store.filled
self.high_water_mark = data_store.high_water_mark

def as_json(self):
return {
"dataStoreName": self.name,
"capacity": self.capacity,
"filled": self.filled,
"highWaterMark": self.high_water_mark,
}
8 changes: 6 additions & 2 deletions apps/analyzer/tests/mock_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ def __init__(self):
self.backups = []
self.tasks = []
self.size_alerts = []
self.creation_date_alerts = []
self.creation_date_alerts = []
self.storage_fill_alerts = []
self.latest_alert_ids = {}

def send_backup_data_batched(self, batch):
Expand All @@ -18,11 +19,14 @@ def create_creation_date_alert(self, alert):
def create_size_alert(self, alert):
self.size_alerts.append(alert)

def create_storage_fill_alert(self, alert):
self.storage_fill_alerts.append(alert)

def set_latest_alert_id(self, alert_type, backup_type, uuid):
self.latest_alert_ids[(alert_type, backup_type)] = uuid

def get_latest_alert_id(self, alert_type, backup_type=None):
if (alert_type, backup_type) in self.latest_alert_ids:
return self.latest_alert_ids[(alert_type, backup_type)]
else:
return ""
return ""
18 changes: 11 additions & 7 deletions apps/analyzer/tests/mock_database.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
class MockDatabase:
def __init__(self, results, tasks = []):
self.results = results
self.tasks = tasks
def __init__(self, results, tasks=[], data_stores=[]):
self.results = results
self.tasks = tasks
self.data_stores = data_stores

def get_results(self):
return iter(self.results)
def get_results(self):
return iter(self.results)

def get_tasks(self):
return iter(self.tasks)
def get_tasks(self):
return iter(self.tasks)

def get_data_stores(self):
return iter(self.data_stores)
95 changes: 94 additions & 1 deletion apps/analyzer/tests/test_simple_rule_based_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime

from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.models import Result
from metadata_analyzer.models import Result, DataStore
from metadata_analyzer.simple_rule_based_analyzer import SimpleRuleBasedAnalyzer
from tests.mock_backend import MockBackend
from tests.mock_database import MockDatabase
Expand All @@ -16,6 +16,14 @@ def _create_mock_result(task, uuid, fdi_type, data_size, start_time):
mock_result.start_time = start_time
return mock_result

def _create_mock_data_store(name, capacity, high_water_mark, filled):
mock_data_store = DataStore()
mock_data_store.name = name
mock_data_store.capacity = capacity
mock_data_store.high_water_mark = high_water_mark
mock_data_store.filled = filled
return mock_data_store


def test_alert():
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"))
Expand Down Expand Up @@ -579,3 +587,88 @@ def test_alert_latest_creation_date():
"referenceDate": "2000-01-04T16:00:00",
"backupId": mock_result4.uuid
}]

# Tests for the storage fill alerts


# Empty data store should not generate an alert
def test_storage_fill_alert_empty():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 0)

database = MockDatabase([], [], [mock_data_store1])
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []


# Data stores with enough capacity left should not generate an alert
def test_storage_fill_alert_enough_capacity_left():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 20)
mock_data_store2 = _create_mock_data_store("bar", 120, 90, 80)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, 50)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []


# Data stores with less than enough capacity left should generate an alert
def test_storage_fill_alert_enough_capacity_left():
mock_data_store1 = _create_mock_data_store("foo", 100, 80, 81)
mock_data_store2 = _create_mock_data_store("bar", 120, 90, 100)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, 150)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == [
{
"dataStoreName": mock_data_store1.name,
"capacity": mock_data_store1.capacity,
"filled": mock_data_store1.filled,
"highWaterMark": mock_data_store1.high_water_mark,
},
{
"dataStoreName": mock_data_store2.name,
"capacity": mock_data_store2.capacity,
"filled": mock_data_store2.filled,
"highWaterMark": mock_data_store2.high_water_mark,
},
{
"dataStoreName": mock_data_store3.name,
"capacity": mock_data_store3.capacity,
"filled": mock_data_store3.filled,
"highWaterMark": mock_data_store3.high_water_mark,
},
]


# Data stores with missing data should not generate an alert
def test_storage_fill_alert_missing_data():
mock_data_store1 = _create_mock_data_store("foo", None, 80, 100)
mock_data_store2 = _create_mock_data_store("bar", 120, None, 120)
mock_data_store3 = _create_mock_data_store("baz", 150, 50, None)

database = MockDatabase(
[], [], [mock_data_store1, mock_data_store2, mock_data_store3]
)
backend = MockBackend()
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
Analyzer.init(database, backend, None, simple_rule_based_analyzer)
Analyzer.simple_rule_based_analysis_storage_capacity(-1)

assert backend.storage_fill_alerts == []

0 comments on commit 56dd3ba

Please sign in to comment.