Download OFF images from S3

wger-project · Jul 9, 2024 · 5c05ac1 · 5c05ac1
1 parent 8a1f796
commit 5c05ac1
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 13 deletions.
diff --git a/wger/nutrition/sync.py b/wger/nutrition/sync.py
@@ -23,6 +23,10 @@
 
 # Third Party
 import requests
+from openfoodfacts.images import (
+    AWS_S3_BASE_URL,
+    generate_image_path,
+)
 
 # wger
 from wger.nutrition.api.endpoints import (
@@ -93,7 +97,7 @@ def fetch_image_from_wger_instance(ingredient):
         Image.from_json(ingredient, retrieved_image, image_data)
 
 
-def fetch_image_from_off(ingredient):
+def fetch_image_from_off(ingredient: Ingredient):
     """
     See
     - https://openfoodfacts.github.io/openfoodfacts-server/api/how-to-download-images/
@@ -104,10 +108,16 @@ def fetch_image_from_off(ingredient):
     url = ingredient.source_url + '?fields=images,image_front_url'
     headers = wger_headers()
     try:
-        product_data = requests.get(url, headers=headers).json()
+        product_data = requests.get(url, headers=headers, timeout=3).json()
     except requests.JSONDecodeError:
         logger.warning(f'Could not decode JSON response from {url}')
         return
+    except requests.ConnectTimeout as e:
+        logger.warning(f'Connection timeout while trying to fetch {url}: {e}')
+        return
+    except requests.ReadTimeout as e:
+        logger.warning(f'Read timeout while trying to fetch {url}: {e}')
+        return
 
     try:
         image_url: Optional[str] = product_data['product'].get('image_front_url')
@@ -120,24 +130,25 @@ def fetch_image_from_off(ingredient):
         return
     image_data = product_data['product']['images']
 
-    # Download the image file
-    response = requests.get(image_url, headers=headers)
-    if response.status_code != 200:
-        logger.info(f'An error occurred! Status code: {response.status_code}')
-        return
-
-    # Parse the file name, looks something like this:
-    # https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg
+    # Extract the image key from the url:
+    # https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg -> "front_en"
     image_id: str = image_url.rpartition('/')[2].partition('.')[0]
 
-    # Retrieve the uploader name
+    # Extract the uploader name
     try:
         image_id: str = image_data[image_id]['imgid']
         uploader_name: str = image_data[image_id]['uploader']
     except KeyError as e:
         logger.info('could not load all image information, skipping...', e)
         return
 
+    # Download image from amazon
+    image_s3_url = f'{AWS_S3_BASE_URL}{generate_image_path(ingredient.code, image_id)}'
+    response = requests.get(image_s3_url, headers=headers)
+    if not response.ok:
+        logger.info(f'Could not locate image on AWS! Status code: {response.status_code}')
+        return
+
     # Save to DB
     url = (
         f'https://world.openfoodfacts.org/cgi/product_image.pl?code={ingredient.code}&id={image_id}'

diff --git a/wger/nutrition/tests/test_tasks.py b/wger/nutrition/tests/test_tasks.py
@@ -15,7 +15,10 @@
 # Standard Library
 
 # Standard Library
-from unittest.mock import patch
+from unittest.mock import (
+    ANY,
+    patch,
+)
 
 # wger
 from wger.core.tests.base_testcase import WgerTestCase
@@ -34,6 +37,7 @@
 class MockOffResponse:
     def __init__(self):
         self.status_code = 200
+        self.ok = True
         self.content = b'2000'
 
     # yapf: disable
@@ -159,9 +163,10 @@ def test_download_ingredient_off(self, mock_logger, mock_from_json, mock_request
             mock_request.assert_any_call(
                 'https://world.openfoodfacts.org/api/v2/product/5055365635003.json?fields=images,image_front_url',
                 headers=wger_headers(),
+                timeout=ANY,
             )
             mock_request.assert_any_call(
-                'https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg',
+                'https://openfoodfacts-images.s3.eu-west-3.amazonaws.com/data/123/456/789/0987654321/12345.jpg',
                 headers=wger_headers(),
             )
             mock_from_json.assert_called()