Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Work on passing execute field to karton #904

Merged
merged 14 commits into from
Feb 1, 2024
58 changes: 40 additions & 18 deletions mwdb/core/karton.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Optional
from typing import TYPE_CHECKING, Any, Dict, Optional

from flask import g
from karton.core import Config as KartonConfig
Expand All @@ -10,6 +10,12 @@
from ..version import app_version
from .config import app_config

if TYPE_CHECKING:
from ..model.blob import TextBlob
from ..model.config import Config
from ..model.file import File
from ..model.object import Object

logger = logging.getLogger("mwdb.karton")


Expand All @@ -35,27 +41,47 @@ def get_karton_producer() -> Optional[Producer]:
return karton_producer


def send_file_to_karton(file) -> str:
def prepare_headers(obj: "Object", arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Prepare headers to use when submitting this object to Karton.
Takes into account object arguments to this analysis, some attributes,
and share_3rd_party field (in this order of precedence).
"""
headers = {
"share_3rd_party": obj.share_3rd_party,
}

ALLOWED_HEADERS = ["execute"]
for attribute in obj.attributes:
if attribute.key in ALLOWED_HEADERS:
headers[attribute.key] = attribute.value

for argument, value in arguments.items():
if argument not in ALLOWED_HEADERS:
raise RuntimeError(f"Argument {argument} is not allowed")
headers[argument] = value

return headers


def send_file_to_karton(file: "File", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
raise RuntimeError("Karton is not enabled or failed to load properly")

feed_quality = g.auth_user.feed_quality
headers_persistent = prepare_headers(file, arguments)
headers_persistent["quality"] = feed_quality
task_priority = TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW

file_stream = file.open()
try:
feed_quality = g.auth_user.feed_quality
task_priority = (
TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW
)
task = Task(
headers={
"type": "sample",
"kind": "raw",
},
headers_persistent={
"quality": feed_quality,
"share_3rd_party": file.share_3rd_party,
},
headers_persistent=headers_persistent,
payload={
"sample": Resource(file.file_name, fd=file_stream, sha256=file.sha256),
"attributes": file.get_attributes(
Expand All @@ -72,7 +98,7 @@ def send_file_to_karton(file) -> str:
return task.root_uid


def send_config_to_karton(config) -> str:
def send_config_to_karton(config: "Config", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
Expand All @@ -84,9 +110,7 @@ def send_config_to_karton(config) -> str:
"kind": config.config_type,
"family": config.family,
},
headers_persistent={
"share_3rd_party": config.share_3rd_party,
},
headers_persistent=prepare_headers(config, arguments),
payload={
"config": config.cfg,
"dhash": config.dhash,
Expand All @@ -99,7 +123,7 @@ def send_config_to_karton(config) -> str:
return task.root_uid


def send_blob_to_karton(blob) -> str:
def send_blob_to_karton(blob: "TextBlob", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
Expand All @@ -110,9 +134,7 @@ def send_blob_to_karton(blob) -> str:
"type": "blob",
"kind": blob.blob_type,
},
headers_persistent={
"share_3rd_party": blob.share_3rd_party,
},
headers_persistent=prepare_headers(blob, arguments),
payload={
"content": blob.content,
"dhash": blob.dhash,
Expand Down
5 changes: 3 additions & 2 deletions mwdb/model/blob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import hashlib
from typing import Any, Dict

from sqlalchemy.ext.hybrid import hybrid_property

Expand Down Expand Up @@ -63,5 +64,5 @@ def get_or_create(

return blob_obj, is_new

def _send_to_karton(self):
return send_blob_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_blob_to_karton(self, arguments)
6 changes: 4 additions & 2 deletions mwdb/model/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Dict

from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.hybrid import hybrid_property

Expand Down Expand Up @@ -57,8 +59,8 @@ def get_or_create(
tags=tags,
)

def _send_to_karton(self):
return send_config_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_config_to_karton(self, arguments)


# Compatibility reasons
Expand Down
5 changes: 3 additions & 2 deletions mwdb/model/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import tempfile
from typing import Any, Dict

import pyzipper
from Cryptodome.Util.strxor import strxor_c
Expand Down Expand Up @@ -356,5 +357,5 @@ def get_by_download_token(download_token):
return None
return File.get(download_req["identifier"]).first()

def _send_to_karton(self):
return send_file_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_file_to_karton(self, arguments)
8 changes: 4 additions & 4 deletions mwdb/model/object.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from collections import namedtuple
from typing import Optional
from typing import Any, Dict, Optional
from uuid import UUID

from flask import g
Expand Down Expand Up @@ -988,14 +988,14 @@ def get_shares(self):
.order_by(ObjectPermission.access_time.asc())
).all()

def _send_to_karton(self):
def _send_to_karton(self, arguments: Dict[str, Any]):
raise NotImplementedError

def spawn_analysis(self, arguments, commit=True):
def spawn_analysis(self, arguments: Dict[str, Any], commit=True):
"""
Spawns new KartonAnalysis for this object
"""
analysis_id = self._send_to_karton()
analysis_id = self._send_to_karton(arguments)
analysis = KartonAnalysis.create(
analysis_id=UUID(analysis_id),
initial_object=self,
Expand Down
3 changes: 2 additions & 1 deletion mwdb/schema/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class AttributeValueSchema(Schema):

@validates("value")
def validate_value(self, value):
if not value:
# Currently only truthy values and False are allowed as values
if not value and value is not False:
raise ValidationError("Value shouldn't be empty")


Expand Down
2 changes: 1 addition & 1 deletion mwdb/schema/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ObjectCreateRequestSchemaBase(Schema):
attributes = fields.Nested(AttributeItemRequestSchema, many=True, missing=[])
upload_as = fields.Str(missing="*", allow_none=False)
karton_id = fields.UUID(missing=None)
karton_arguments = fields.Dict(missing={}, keys=fields.Str(), values=fields.Str())
karton_arguments = fields.Dict(missing={}, keys=fields.Str())
tags = fields.Nested(TagRequestSchema, many=True, missing=[])
share_3rd_party = fields.Boolean(missing=True)

Expand Down
14 changes: 4 additions & 10 deletions tests/backend/test_karton.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,24 @@ def test_karton_analysis_after_adding_sample(admin_session):
assert len(analyses) == 1


def test_karton_reanalyze_object_with_args(admin_session):
def test_karton_reanalyze_blob(admin_session):
test = admin_session
blob_name = rand_string(15)
argument_key = rand_string(5)
argument_value = rand_string(5)
blob = test.add_blob(None, blobname=blob_name, blobtype="inject", content="""
Binary junk: \x00\x01\x02\x03\x04\x05\x07
HELLO WORLD!
========""" + random_name())
blob_dhash = blob["id"]
new_analysis = test.reanalyze_object(blob_dhash, arguments={argument_key: argument_value})
test.reanalyze_object(blob_dhash)
analyses = test.get_analyses(blob_dhash)["analyses"]
assert new_analysis["arguments"] == {argument_key: argument_value}
assert len(analyses) == 2

incorrect_object_dhash = "abcdefghi"
with ShouldRaise(status_code=404):
test.reanalyze_object(incorrect_object_dhash)


def test_karton_reanalyze_object_without_args(admin_session):
def test_karton_reanalyze_file(admin_session):
test = admin_session
file_name = rand_string(15)
file_content = rand_string()
Expand All @@ -77,18 +74,15 @@ def test_karton_reanalyze_object_without_args(admin_session):
def test_get_karton_analysis_info(admin_session):
test = admin_session
blob_name = rand_string(15)
argument_key = rand_string(5)
argument_value = rand_string(5)
blob = test.add_blob(None, blobname=blob_name, blobtype="inject", content="""
Binary junk: \x00\x01\x02\x03\x04\x05\x07
HELLO WORLD!
========""" + random_name())
blob_dhash = blob["id"]
new_analysis = test.reanalyze_object(blob_dhash, arguments={argument_key: argument_value})
new_analysis = test.reanalyze_object(blob_dhash)
analysis_id = new_analysis["id"]
analysis_info = test.get_analysis_info(blob_dhash, analysis_id)
assert analysis_id == analysis_info["id"]
assert analysis_info["arguments"] == {argument_key: argument_value}


def test_assign_analysis_to_object(admin_session):
Expand Down
Loading