Skip to content

Commit

Permalink
Merge pull request #142 from amosproj/131-analysis-module-filter-back…
Browse files Browse the repository at this point in the history
…up-data-by-backup-tasks-ids

131 analysis module filter backup data by backup tasks ids
  • Loading branch information
chrisklg authored Dec 8, 2024
2 parents 6498ff2 + aaa69c5 commit 28d7662
Show file tree
Hide file tree
Showing 17 changed files with 271 additions and 106 deletions.
57 changes: 48 additions & 9 deletions apps/analyzer/metadata_analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime


class Analyzer:
def init(database, backend, simple_analyzer, simple_rule_based_analyzer):
Analyzer.database = database
Expand All @@ -22,16 +23,24 @@ def analyze():
# Convert a result from the database into the format used by the backend
def _convert_result(result):
backup_type = {
"F": "FULL",
"I": "INCREMENTAL",
"D": "DIFFERENTIAL",
"C": "COPY"
"F": "FULL",
"I": "INCREMENTAL",
"D": "DIFFERENTIAL",
"C": "COPY"
}[result.fdi_type]
return {
"id": result.uuid,
"sizeMB": result.data_size / 1_000_000,
"creationDate": result.start_time.isoformat(),
"type": backup_type
"type": backup_type,
"taskId": result.task_uuid,
}

# Convert a task from the database into the format used by the backend
def _convert_task(task):
return {
"id": task.uuid,
"displayName": task.task,
}

def _get_start_date(data, alert_type, backup_type):
Expand All @@ -43,13 +52,13 @@ def _get_start_date(data, alert_type, backup_type):
assert len(latest_alerts) == 1
return latest_alerts[0]


def update_data():
def _send_Backups():
results = list(Analyzer.database.get_results())

# Batch the api calls to the backend for improved efficiency
batch = []
count = 0

for result in results:
# Only send real backups
if result.is_backup <= 0:
Expand All @@ -73,18 +82,48 @@ def update_data():

return {"count": count}

def _send_Tasks():
tasks = list(Analyzer.database.get_tasks())

# Batch the api calls to the backend for improved efficiency
batch = []
count = 0

for task in tasks:

if task.uuid is None or task.task is None:
continue

batch.append(Analyzer._convert_task(task))
count += 1

# Send a full batch
if len(batch) == 100:
Analyzer.backend.send_task_data_batched(batch)
batch = []

# Send the remaining results
if len(batch) > 0:
Analyzer.backend.send_task_data_batched(batch)

return {"count": count}

def update_data():
Analyzer._send_Tasks()
Analyzer._send_Backups()

def simple_rule_based_analysis(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "FULL")
result = Analyzer.simple_rule_based_analyzer.analyze(data, alert_limit, start_date)
return result

def simple_rule_based_analysis_diff(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "DIFFERENTIAL")
result = Analyzer.simple_rule_based_analyzer.analyze_diff(data, alert_limit, start_date)
return result

def simple_rule_based_analysis_inc(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "INCREMENTAL")
Expand Down
5 changes: 5 additions & 0 deletions apps/analyzer/metadata_analyzer/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ def send_backup_data_batched(self, batch):
r = requests.post(url, json=batch)
r.raise_for_status()

def send_task_data_batched(self, batch):
url = self.backend_url + "tasks/batched"
r = requests.post(url, json=batch)
r.raise_for_status()

def create_size_alert(self, alert):
url = self.backend_url + "alerting/size"
r = requests.post(url, json=alert)
Expand Down
9 changes: 8 additions & 1 deletion apps/analyzer/metadata_analyzer/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pg8000.dbapi
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from metadata_analyzer.models import BackupData, Result
from metadata_analyzer.models import BackupData, Result, Tasks
import os


Expand Down Expand Up @@ -40,3 +40,10 @@ def get_results(self):

result = session.scalars(stmt)
return result

def get_tasks(self):
session = Session(self.engine)
stmt = select(Tasks)

result = session.scalars(stmt)
return result
18 changes: 17 additions & 1 deletion apps/analyzer/metadata_analyzer/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from sqlalchemy.orm import mapped_column, Mapped, declarative_base
from datetime import datetime

from sqlalchemy.orm import mapped_column, Mapped, declarative_base

Base = declarative_base()


Expand All @@ -26,6 +27,7 @@ class Result(Base):
saveset: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]
task: Mapped[str]
task_uuid: Mapped[str]
fdi_type: Mapped[str]
is_backup: Mapped[int]
state: Mapped[int]
Expand All @@ -43,3 +45,17 @@ def __repr__(self):

def __str__(self):
return repr(self)


class Tasks(Base):
__tablename__ = "tasks"

# For now I only added the most relevant columns
task: Mapped[str] = mapped_column(primary_key=True)
uuid: Mapped[str]

def __repr__(self):
return f"""Tasks(uuid={self.uuid})"""

def __str__(self):
return repr(self)
4 changes: 4 additions & 0 deletions apps/analyzer/tests/mock_backend.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
class MockBackend:
def __init__(self):
self.backups = []
self.tasks = []
self.size_alerts = []
self.creation_date_alerts = []
self.latest_alert_ids = {}

def send_backup_data_batched(self, batch):
self.backups += batch

def send_task_data_batched(self, batch):
self.tasks += batch

def create_creation_date_alert(self, alert):
self.creation_date_alerts.append(alert)

Expand Down
12 changes: 8 additions & 4 deletions apps/analyzer/tests/mock_database.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
class MockDatabase:
def __init__(self, results):
self.results = results
def __init__(self, results, tasks = []):
self.results = results
self.tasks = tasks

def get_results(self):
return iter(self.results)
def get_results(self):
return iter(self.results)

def get_tasks(self):
return iter(self.tasks)
127 changes: 79 additions & 48 deletions apps/analyzer/tests/test_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,90 @@
from datetime import datetime

from metadata_analyzer.analyzer import Analyzer
from metadata_analyzer.models import Result
from metadata_analyzer.models import Result, Tasks
from tests.mock_backend import MockBackend
from tests.mock_database import MockDatabase

def _create_mock_result(task, uuid, fdi_type, data_size, start_time, is_backup=1):
mock_result = Result()
mock_result.task = task
mock_result.uuid = uuid
mock_result.fdi_type = fdi_type
mock_result.data_size = data_size
mock_result.start_time = start_time
mock_result.is_backup = is_backup
return mock_result

def _create_mock_result(task, uuid, fdi_type, data_size, start_time, task_uuid=None, is_backup=1):
mock_result = Result()
mock_result.task = task
mock_result.uuid = uuid
mock_result.fdi_type = fdi_type
mock_result.data_size = data_size
mock_result.start_time = start_time
mock_result.is_backup = is_backup
mock_result.task_uuid = task_uuid
return mock_result


def _create_mock_task(uuid, task):
mock_task = Tasks()
mock_task.uuid = uuid
mock_task.task = task
return mock_task


def test_update_data_all_types():
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"))
mock_result2 = _create_mock_result("foo", "2", "D", 150_000_000, datetime.fromisoformat("2000-01-02"))
mock_result3 = _create_mock_result("foo", "3", "I", 200_000_000, datetime.fromisoformat("2000-01-03"))
mock_result4 = _create_mock_result("foo", "4", "C", 250_000_000, datetime.fromisoformat("2000-01-04"))
mock_results = [mock_result1, mock_result2, mock_result3, mock_result4]

database = MockDatabase(mock_results)
backend = MockBackend()
Analyzer.init(database, backend, None, None)
Analyzer.update_data()

assert backend.backups == [{
"id": mock_result1.uuid,
"sizeMB": mock_result1.data_size / 1_000_000,
"creationDate": mock_result1.start_time.isoformat(),
"type": "FULL"
}, {
"id": mock_result2.uuid,
"sizeMB": mock_result2.data_size / 1_000_000,
"creationDate": mock_result2.start_time.isoformat(),
"type": "DIFFERENTIAL"
}, {
"id": mock_result3.uuid,
"sizeMB": mock_result3.data_size / 1_000_000,
"creationDate": mock_result3.start_time.isoformat(),
"type": "INCREMENTAL"
}, {
"id": mock_result4.uuid,
"sizeMB": mock_result4.data_size / 1_000_000,
"creationDate": mock_result4.start_time.isoformat(),
"type": "COPY"
}]
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"))
mock_result2 = _create_mock_result("foo", "2", "D", 150_000_000, datetime.fromisoformat("2000-01-02"))
mock_result3 = _create_mock_result("foo", "3", "I", 200_000_000, datetime.fromisoformat("2000-01-03"))
mock_result4 = _create_mock_result("foo", "4", "C", 250_000_000, datetime.fromisoformat("2000-01-04"), '123')
mock_results = [mock_result1, mock_result2, mock_result3, mock_result4]

mock_task1 = _create_mock_task("1", "task1")
mock_task2 = _create_mock_task("123", "task123")
mock_tasks = [mock_task1, mock_task2]

database = MockDatabase(mock_results, mock_tasks)
backend = MockBackend()
Analyzer.init(database, backend, None, None)
Analyzer.update_data()

assert backend.backups == [{
"id": mock_result1.uuid,
"sizeMB": mock_result1.data_size / 1_000_000,
"creationDate": mock_result1.start_time.isoformat(),
"type": "FULL",
"taskId": None
}, {
"id": mock_result2.uuid,
"sizeMB": mock_result2.data_size / 1_000_000,
"creationDate": mock_result2.start_time.isoformat(),
"type": "DIFFERENTIAL",
"taskId": None
}, {
"id": mock_result3.uuid,
"sizeMB": mock_result3.data_size / 1_000_000,
"creationDate": mock_result3.start_time.isoformat(),
"type": "INCREMENTAL",
"taskId": None
}, {
"id": mock_result4.uuid,
"sizeMB": mock_result4.data_size / 1_000_000,
"creationDate": mock_result4.start_time.isoformat(),
"type": "COPY",
"taskId": '123'
}]

assert backend.tasks == [
{
"id": "1",
"displayName": "task1"
},
{
"id": "123",
"displayName": "task123"
}
]


def test_update_data_not_a_backup():
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"), 0)
mock_result1 = _create_mock_result("foo", "1", "F", 100_000_000, datetime.fromisoformat("2000-01-01"), None, 0)

database = MockDatabase([mock_result1])
backend = MockBackend()
Analyzer.init(database, backend, None, None)
Analyzer.update_data()
database = MockDatabase([mock_result1], [])
backend = MockBackend()
Analyzer.init(database, backend, None, None)
Analyzer.update_data()

assert backend.backups == []
assert backend.backups == []
4 changes: 4 additions & 0 deletions apps/backend/src/app/alerting/alerting.controller.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import { CreateSizeAlertDto } from './dto/alerts/createSizeAlert.dto';
import { CREATION_DATE_ALERT, SIZE_ALERT } from '../utils/constants';
import { CreateCreationDateAlertDto } from './dto/alerts/createCreationDateAlert.dto';
import { CreationDateAlertEntity } from './entity/alerts/creationDateAlert.entity';
import { TaskEntity } from '../tasks/entity/task.entity';
import { MailReceiverEntity } from '../utils/mail/entity/MailReceiver.entity';


const mockedBackupDataEntity: BackupDataEntity = {
id: 'backup-id',
sizeMB: 100,
Expand Down Expand Up @@ -105,6 +107,8 @@ describe('AlertingController (e2e)', () => {
.useValue(mockCreationDateAlertRepository)
.overrideProvider(getRepositoryToken(AlertTypeEntity))
.useValue(mockAlertTypeRepository)
.overrideProvider(getRepositoryToken(TaskEntity))
.useValue({})
.overrideProvider(getRepositoryToken(MailReceiverEntity))
.useValue({})
.compile();
Expand Down
5 changes: 5 additions & 0 deletions apps/backend/src/app/backupData/backupData.controller.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { BackupDataEntity } from './entity/backupData.entity';
import { CreateBackupDataDto } from './dto/createBackupData.dto';
import { BackupDataModule } from './backupData.module';
import { BackupType } from './dto/backupType';
import { TaskEntity } from '../tasks/entity/task.entity';

const mockBackupDataEntity: BackupDataEntity = {
id: '123e4567-e89b-12d3-a456-426614174062',
Expand Down Expand Up @@ -40,6 +41,10 @@ describe('BackupDataController (e2e)', () => {
})
.overrideProvider(getRepositoryToken(BackupDataEntity))
.useValue(mockBackupDataRepository)
.overrideProvider(getRepositoryToken(TaskEntity))
.useValue({
findOneBy: jest.fn().mockResolvedValue(new TaskEntity()),
})
.compile();

repository = module.get(getRepositoryToken(BackupDataEntity));
Expand Down
Loading

0 comments on commit 28d7662

Please sign in to comment.