From c218523529b2360688e7d0c46455516e6ca22764 Mon Sep 17 00:00:00 2001 From: kvothe Date: Wed, 23 Oct 2024 00:16:17 +0200 Subject: [PATCH] Create proper enums for job statuses (#412) --- src/db.py | 30 +++---- ...el_removed_status_create_enum_jobstatus.py | 82 +++++++++++++++++++ src/models/job.py | 18 +++- src/models/jobagent.py | 5 +- src/models/match.py | 5 +- src/mqueryfront/src/utils.js | 3 +- src/tasks.py | 8 +- 7 files changed, 127 insertions(+), 24 deletions(-) create mode 100644 src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py diff --git a/src/db.py b/src/db.py index 32698b7e..4c85d1be 100644 --- a/src/db.py +++ b/src/db.py @@ -17,11 +17,12 @@ and_, update, col, + delete, ) from .models.agentgroup import AgentGroup from .models.configentry import ConfigEntry -from .models.job import Job +from .models.job import Job, JobStatus from .models.jobagent import JobAgent from .models.match import Match from .schema import MatchesSchema, ConfigSchema @@ -67,7 +68,11 @@ def cancel_job(self, job: JobId, error=None) -> None: session.execute( update(Job) .where(Job.id == job) - .values(status="cancelled", finished=int(time()), error=error) + .values( + status=JobStatus.cancelled, + finished=int(time()), + error=error, + ) ) session.commit() @@ -85,23 +90,18 @@ def get_job(self, job: JobId) -> Job: return self.__get_job(session, job) def get_valid_jobs(self, username_filter: Optional[str]) -> List[Job]: - """Retrieves valid (accessible and not removed) jobs from the database.""" + """Retrieves valid (accessible) jobs from the database.""" with self.session() as session: - query = ( - select(Job) - .where(Job.status != "removed") - .order_by(col(Job.submitted).desc()) - ) + query = select(Job).order_by(col(Job.submitted).desc()) if username_filter: query = query.where(Job.rule_author == username_filter) return session.exec(query).all() def remove_query(self, job: JobId) -> None: - """Sets the job status to removed.""" + """Delete the job, linked match and job agent from the database.""" with self.session() as session: - session.execute( - update(Job).where(Job.id == job).values(status="removed") - ) + delete_query = delete(Job).where(Job.id == job) + session.execute(delete_query) session.commit() def add_match(self, job: JobId, match: Match) -> None: @@ -149,7 +149,7 @@ def agent_finish_job(self, job: Job) -> None: session.execute( update(Job) .where(Job.internal_id == job.internal_id) - .values(finished=int(time()), status="done") + .values(finished=int(time()), status=JobStatus.done) ) session.commit() @@ -220,7 +220,7 @@ def init_job_datasets(self, job: JobId, num_datasets: int) -> None: .values( total_datasets=num_datasets, datasets_left=num_datasets, - status="processing", + status=JobStatus.processing, ) ) session.commit() @@ -253,7 +253,7 @@ def create_search_task( with self.session() as session: obj = Job( id=job, - status="new", + status=JobStatus.new, rule_name=rule_name, rule_author=rule_author, raw_yara=raw_yara, diff --git a/src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py b/src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py new file mode 100644 index 00000000..5a57cb63 --- /dev/null +++ b/src/migrations/versions/6b495d5a4855_cascade_jobagent_match_and_del_removed_status_create_enum_jobstatus.py @@ -0,0 +1,82 @@ +"""add jobstatus +Revision ID: 6b495d5a4855 +Revises: dbb81bd4d47f +Create Date: 2024-10-15 08:17:30.036531 +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "6b495d5a4855" +down_revision = "dbb81bd4d47f" +branch_labels = None +depends_on = None + +job_status = sa.Enum( + "done", "new", "cancelled", "processing", name="jobstatus" +) + + +def upgrade() -> None: + op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") + op.create_foreign_key( + constraint_name="jobagent_job_id_fkey", + source_table="jobagent", + referent_table="job", + local_cols=["job_id"], + remote_cols=["internal_id"], + ondelete="CASCADE", + ) + + op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") + op.create_foreign_key( + constraint_name="match_job_id_fkey", + source_table="match", + referent_table="job", + local_cols=["job_id"], + remote_cols=["internal_id"], + ondelete="CASCADE", + ) + + op.execute("DELETE FROM job WHERE status = 'removed';") + + job_status.create(op.get_bind()) + op.alter_column( + "job", + "status", + existing_type=sa.VARCHAR(), + type_=job_status, + postgresql_using="status::jobstatus", + nullable=True, + ) + + +def downgrade() -> None: + op.alter_column( + "job", + "status", + existing_type=job_status, + type_=sa.VARCHAR(), + nullable=False, + ) + + op.execute("DROP TYPE IF EXISTS jobstatus") + + op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey") + op.create_foreign_key( + constraint_name="jobagent_job_id_fkey", + source_table="jobagent", + referent_table="job", + local_cols=["job_id"], + remote_cols=["internal_id"], + ) + + op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey") + op.create_foreign_key( + constraint_name="match_job_id_fkey", + source_table="match", + referent_table="job", + local_cols=["job_id"], + remote_cols=["internal_id"], + ) diff --git a/src/models/job.py b/src/models/job.py index 4fa93abd..34e5f893 100644 --- a/src/models/job.py +++ b/src/models/job.py @@ -1,3 +1,7 @@ +import enum + +from sqlalchemy.dialects import postgresql + from sqlmodel import SQLModel, Field, ARRAY, String, Column, Relationship from typing import Optional, List, Union, TYPE_CHECKING @@ -6,11 +10,20 @@ from ..models.jobagent import JobAgent +class JobStatus(enum.Enum): + done = "done" + new = "new" + cancelled = "cancelled" + processing = "processing" + + class JobView(SQLModel): """Public fields of mquery jobs.""" + __table_args__ = {"extend_existing": True} + id: str - status: str + status: JobStatus = Field(sa_column=Column(postgresql.ENUM(JobStatus, name="jobstatus"))) # type: ignore error: Optional[str] rule_name: str rule_author: str @@ -29,6 +42,9 @@ class JobView(SQLModel): total_datasets: int agents_left: int + class Config: + arbitrary_types_allowed = True + class Job(JobView, table=True): """Job object in the database. Internal ID is an implementation detail.""" diff --git a/src/models/jobagent.py b/src/models/jobagent.py index f40313de..a8041ea3 100644 --- a/src/models/jobagent.py +++ b/src/models/jobagent.py @@ -1,3 +1,4 @@ +from sqlalchemy import Column, ForeignKey from sqlmodel import SQLModel, Field, Relationship from typing import Union, TYPE_CHECKING @@ -12,7 +13,9 @@ class JobAgent(SQLModel, table=True): id: Union[int, None] = Field(default=None, primary_key=True) task_in_progress: int - job_id: int = Field(foreign_key="job.internal_id") + job_id: int = Field( + sa_column=Column(ForeignKey("job.internal_id", ondelete="CASCADE")) + ) job: "Job" = Relationship(back_populates="agents") agent_id: int = Field(foreign_key="agentgroup.id") diff --git a/src/models/match.py b/src/models/match.py index 23980822..7e089688 100644 --- a/src/models/match.py +++ b/src/models/match.py @@ -1,3 +1,4 @@ +from sqlalchemy import ForeignKey from sqlmodel import SQLModel, Field, ARRAY, String, Column, JSON, Relationship from typing import List, Union, Dict, Any @@ -15,5 +16,7 @@ class Match(SQLModel, table=True): # A list of yara rules matched to this file matches: List[str] = Field(sa_column=Column(ARRAY(String))) - job_id: int = Field(foreign_key="job.internal_id") + job_id: int = Field( + sa_column=Column(ForeignKey("job.internal_id", ondelete="CASCADE")) + ) job: Job = Relationship(back_populates="matches") diff --git a/src/mqueryfront/src/utils.js b/src/mqueryfront/src/utils.js index b64e4bea..3d054721 100644 --- a/src/mqueryfront/src/utils.js +++ b/src/mqueryfront/src/utils.js @@ -1,12 +1,11 @@ export const isStatusFinished = (status) => - ["done", "cancelled", "removed"].includes(status); + ["done", "cancelled"].includes(status); const statusClassMap = { done: "success", new: "info", processing: "info", cancelled: "danger", - removed: "dark", }; export const isAuthEnabled = (config) => diff --git a/src/tasks.py b/src/tasks.py index 5e3e105c..4a0a88bf 100644 --- a/src/tasks.py +++ b/src/tasks.py @@ -9,7 +9,7 @@ from .util import make_sha256_tag from .config import app_config from .plugins import PluginManager -from .models.job import Job +from .models.job import Job, JobStatus from .models.match import Match from .lib.yaraparse import parse_yara, combine_rules from .lib.ursadb import Json, UrsaDb @@ -182,7 +182,7 @@ def start_search(job_id: JobId) -> None: """ with job_context(job_id) as agent: job = agent.db.get_job(job_id) - if job.status == "cancelled": + if job.status == JobStatus.cancelled: logging.info("Job was cancelled, returning...") return @@ -232,7 +232,7 @@ def query_ursadb(job_id: JobId, dataset_id: str, ursadb_query: str) -> None: """Queries ursadb and creates yara scans tasks with file batches.""" with job_context(job_id) as agent: job = agent.db.get_job(job_id) - if job.status == "cancelled": + if job.status == JobStatus.cancelled: logging.info("Job was cancelled, returning...") return @@ -271,7 +271,7 @@ def run_yara_batch(job_id: JobId, iterator: str, batch_size: int) -> None: """Actually scans files, and updates a database with the results.""" with job_context(job_id) as agent: job = agent.db.get_job(job_id) - if job.status == "cancelled": + if job.status == JobStatus.cancelled: logging.info("Job was cancelled, returning...") return