Skip to content

Commit

Permalink
Download OFF images from S3
Browse files Browse the repository at this point in the history
  • Loading branch information
rolandgeider committed Jul 9, 2024
1 parent 8a1f796 commit 5c05ac1
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
33 changes: 22 additions & 11 deletions wger/nutrition/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@

# Third Party
import requests
from openfoodfacts.images import (
AWS_S3_BASE_URL,
generate_image_path,
)

# wger
from wger.nutrition.api.endpoints import (
Expand Down Expand Up @@ -93,7 +97,7 @@ def fetch_image_from_wger_instance(ingredient):
Image.from_json(ingredient, retrieved_image, image_data)


def fetch_image_from_off(ingredient):
def fetch_image_from_off(ingredient: Ingredient):
"""
See
- https://openfoodfacts.github.io/openfoodfacts-server/api/how-to-download-images/
Expand All @@ -104,10 +108,16 @@ def fetch_image_from_off(ingredient):
url = ingredient.source_url + '?fields=images,image_front_url'
headers = wger_headers()
try:
product_data = requests.get(url, headers=headers).json()
product_data = requests.get(url, headers=headers, timeout=3).json()
except requests.JSONDecodeError:
logger.warning(f'Could not decode JSON response from {url}')
return
except requests.ConnectTimeout as e:
logger.warning(f'Connection timeout while trying to fetch {url}: {e}')
return
except requests.ReadTimeout as e:
logger.warning(f'Read timeout while trying to fetch {url}: {e}')
return

try:
image_url: Optional[str] = product_data['product'].get('image_front_url')
Expand All @@ -120,24 +130,25 @@ def fetch_image_from_off(ingredient):
return
image_data = product_data['product']['images']

# Download the image file
response = requests.get(image_url, headers=headers)
if response.status_code != 200:
logger.info(f'An error occurred! Status code: {response.status_code}')
return

# Parse the file name, looks something like this:
# https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg
# Extract the image key from the url:
# https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg -> "front_en"
image_id: str = image_url.rpartition('/')[2].partition('.')[0]

# Retrieve the uploader name
# Extract the uploader name
try:
image_id: str = image_data[image_id]['imgid']
uploader_name: str = image_data[image_id]['uploader']
except KeyError as e:
logger.info('could not load all image information, skipping...', e)
return

# Download image from amazon
image_s3_url = f'{AWS_S3_BASE_URL}{generate_image_path(ingredient.code, image_id)}'
response = requests.get(image_s3_url, headers=headers)
if not response.ok:
logger.info(f'Could not locate image on AWS! Status code: {response.status_code}')
return

# Save to DB
url = (
f'https://world.openfoodfacts.org/cgi/product_image.pl?code={ingredient.code}&id={image_id}'
Expand Down
9 changes: 7 additions & 2 deletions wger/nutrition/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
# Standard Library

# Standard Library
from unittest.mock import patch
from unittest.mock import (
ANY,
patch,
)

# wger
from wger.core.tests.base_testcase import WgerTestCase
Expand All @@ -34,6 +37,7 @@
class MockOffResponse:
def __init__(self):
self.status_code = 200
self.ok = True
self.content = b'2000'

# yapf: disable
Expand Down Expand Up @@ -159,9 +163,10 @@ def test_download_ingredient_off(self, mock_logger, mock_from_json, mock_request
mock_request.assert_any_call(
'https://world.openfoodfacts.org/api/v2/product/5055365635003.json?fields=images,image_front_url',
headers=wger_headers(),
timeout=ANY,
)
mock_request.assert_any_call(
'https://images.openfoodfacts.org/images/products/00975957/front_en.5.400.jpg',
'https://openfoodfacts-images.s3.eu-west-3.amazonaws.com/data/123/456/789/0987654321/12345.jpg',
headers=wger_headers(),
)
mock_from_json.assert_called()
Expand Down

0 comments on commit 5c05ac1

Please sign in to comment.