Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[omm][api] Hash api to detect content type #1359

Merged
merged 1 commit into from
Sep 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions open-media-match/src/OpenMediaMatch/blueprints/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import requests

from threatexchange.content_type.content_base import ContentType
from threatexchange.content_type.photo import PhotoContent
from threatexchange.content_type.video import VideoContent
from threatexchange.signal_type.signal_base import FileHasher, SignalType

from OpenMediaMatch import app_resources
Expand All @@ -24,19 +26,21 @@
def hash_media():
"""
Fetch content and return its hash.
TODO: implement
"""

content_type = _parse_request_content_type()
signal_types = _parse_request_signal_type(content_type)

media_url = request.args.get("url", None)
if media_url is None:
abort(400, "url is required")

download_resp = requests.get(media_url, allow_redirects=True, timeout=30 * 1000)
download_resp.raise_for_status()

url_content_type = download_resp.headers["content-type"]

current_app.logger.debug("%s is type %s", media_url, url_content_type)

content_type = _parse_request_content_type(url_content_type)
signal_types = _parse_request_signal_type(content_type)

ret = {}

# For images, we may need to copy the file suffix (.png, jpeg, etc) for it to work
Expand All @@ -53,9 +57,21 @@ def hash_media():
return ret


def _parse_request_content_type() -> ContentType:
storage = app_resources.get_storage()
def _parse_request_content_type(url_content_type: str) -> ContentType:
arg = request.args.get("content_type", "")
if not arg:
if url_content_type.lower().startswith("image"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image/ ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't sure if "image" by itself is a type. I didn't find any comprehensive listing.

arg = PhotoContent.get_name()
elif url_content_type.lower().startswith("video"):
arg = VideoContent.get_name()
else:
abort(
400,
f"unsupported url ContentType: '{url_content_type}', "
"if you know the expected type, provide it with content_type",
)

storage = app_resources.get_storage()
content_type_config = storage.get_content_type_configs().get(arg)
if content_type_config is None:
abort(400, f"no such content_type: '{arg}'")
Expand Down
Loading