From 0892c2b43e6691c931ee6d9f885c49a403ec6512 Mon Sep 17 00:00:00 2001 From: dcallies Date: Wed, 13 Sep 2023 08:35:10 -0700 Subject: [PATCH] [omm][api] Hash api to detect content type --- .../src/OpenMediaMatch/blueprints/hashing.py | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/open-media-match/src/OpenMediaMatch/blueprints/hashing.py b/open-media-match/src/OpenMediaMatch/blueprints/hashing.py index 1106cb0bd..ad50b3693 100644 --- a/open-media-match/src/OpenMediaMatch/blueprints/hashing.py +++ b/open-media-match/src/OpenMediaMatch/blueprints/hashing.py @@ -13,6 +13,8 @@ import requests from threatexchange.content_type.content_base import ContentType +from threatexchange.content_type.photo import PhotoContent +from threatexchange.content_type.video import VideoContent from threatexchange.signal_type.signal_base import FileHasher, SignalType from OpenMediaMatch import app_resources @@ -24,12 +26,7 @@ def hash_media(): """ Fetch content and return its hash. - TODO: implement """ - - content_type = _parse_request_content_type() - signal_types = _parse_request_signal_type(content_type) - media_url = request.args.get("url", None) if media_url is None: abort(400, "url is required") @@ -37,6 +34,13 @@ def hash_media(): download_resp = requests.get(media_url, allow_redirects=True, timeout=30 * 1000) download_resp.raise_for_status() + url_content_type = download_resp.headers["content-type"] + + current_app.logger.debug("%s is type %s", media_url, url_content_type) + + content_type = _parse_request_content_type(url_content_type) + signal_types = _parse_request_signal_type(content_type) + ret = {} # For images, we may need to copy the file suffix (.png, jpeg, etc) for it to work @@ -53,9 +57,21 @@ def hash_media(): return ret -def _parse_request_content_type() -> ContentType: - storage = app_resources.get_storage() +def _parse_request_content_type(url_content_type: str) -> ContentType: arg = request.args.get("content_type", "") + if not arg: + if url_content_type.lower().startswith("image"): + arg = PhotoContent.get_name() + elif url_content_type.lower().startswith("video"): + arg = VideoContent.get_name() + else: + abort( + 400, + f"unsupported url ContentType: '{url_content_type}', " + "if you know the expected type, provide it with content_type", + ) + + storage = app_resources.get_storage() content_type_config = storage.get_content_type_configs().get(arg) if content_type_config is None: abort(400, f"no such content_type: '{arg}'")