From 2f9a1ffe2fc75e74ef168c98d61b765a72add5d9 Mon Sep 17 00:00:00 2001 From: paul Date: Sun, 8 Oct 2023 11:17:44 +0100 Subject: [PATCH 1/5] fix: Fix interrupted downloads issue --- laser_encoders/download_models.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/laser_encoders/download_models.py b/laser_encoders/download_models.py index 17a5db35..1f6ecb77 100644 --- a/laser_encoders/download_models.py +++ b/laser_encoders/download_models.py @@ -46,21 +46,31 @@ def __init__(self, model_dir: str = None): def download(self, filename: str): url = os.path.join(self.base_url, filename) - local_file_path = self.model_dir / filename - if local_file_path.exists(): + local_file_path = os.path.join(self.model_dir, filename) + temp_file_path = os.path.join('/tmp', filename) + + if os.path.exists(local_file_path): logger.info(f" - {filename} already downloaded") else: logger.info(f" - Downloading {filename}") + + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + response = requests.get(url, stream=True) total_size = int(response.headers.get("Content-Length", 0)) progress_bar = tqdm(total=total_size, unit_scale=True, unit="B") - with open(local_file_path, "wb") as f: + + # Download to /tmp first + with open(temp_file_path, "wb") as f: for chunk in response.iter_content(chunk_size=1024): f.write(chunk) progress_bar.update(len(chunk)) progress_bar.close() + os.rename(temp_file_path, local_file_path) + def get_language_code(self, language_list: dict, lang: str) -> str: try: lang_3_4 = language_list[lang] From 7716c111b318adad5f25ca767ad5df00272b8c81 Mon Sep 17 00:00:00 2001 From: paul Date: Sun, 8 Oct 2023 11:24:40 +0100 Subject: [PATCH 2/5] style: Format code using black --- laser_encoders/download_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/laser_encoders/download_models.py b/laser_encoders/download_models.py index 1f6ecb77..0d61e7c7 100644 --- a/laser_encoders/download_models.py +++ b/laser_encoders/download_models.py @@ -48,7 +48,7 @@ def download(self, filename: str): url = os.path.join(self.base_url, filename) local_file_path = os.path.join(self.model_dir, filename) - temp_file_path = os.path.join('/tmp', filename) + temp_file_path = os.path.join("/tmp", filename) if os.path.exists(local_file_path): logger.info(f" - {filename} already downloaded") @@ -61,7 +61,7 @@ def download(self, filename: str): response = requests.get(url, stream=True) total_size = int(response.headers.get("Content-Length", 0)) progress_bar = tqdm(total=total_size, unit_scale=True, unit="B") - + # Download to /tmp first with open(temp_file_path, "wb") as f: for chunk in response.iter_content(chunk_size=1024): From 816ddb08701c69b1d816e3b79df06c0edb94c480 Mon Sep 17 00:00:00 2001 From: paul Date: Mon, 9 Oct 2023 15:15:39 +0100 Subject: [PATCH 3/5] Update download method to use tempfile --- laser_encoders/download_models.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/laser_encoders/download_models.py b/laser_encoders/download_models.py index 0d61e7c7..1ec18d4b 100644 --- a/laser_encoders/download_models.py +++ b/laser_encoders/download_models.py @@ -18,6 +18,7 @@ import logging import os import sys +import tempfile from pathlib import Path import requests @@ -48,26 +49,24 @@ def download(self, filename: str): url = os.path.join(self.base_url, filename) local_file_path = os.path.join(self.model_dir, filename) - temp_file_path = os.path.join("/tmp", filename) if os.path.exists(local_file_path): logger.info(f" - {filename} already downloaded") else: logger.info(f" - Downloading {filename}") - if os.path.exists(temp_file_path): - os.remove(temp_file_path) + tf = tempfile.NamedTemporaryFile(delete=False) + temp_file_path = tf.name - response = requests.get(url, stream=True) - total_size = int(response.headers.get("Content-Length", 0)) - progress_bar = tqdm(total=total_size, unit_scale=True, unit="B") + with tf: + response = requests.get(url, stream=True) + total_size = int(response.headers.get("Content-Length", 0)) + progress_bar = tqdm(total=total_size, unit_scale=True, unit="B") - # Download to /tmp first - with open(temp_file_path, "wb") as f: for chunk in response.iter_content(chunk_size=1024): - f.write(chunk) + tf.write(chunk) progress_bar.update(len(chunk)) - progress_bar.close() + progress_bar.close() os.rename(temp_file_path, local_file_path) From c947443709382cdd1a2036d624f45a0850747328 Mon Sep 17 00:00:00 2001 From: paul Date: Mon, 9 Oct 2023 15:17:55 +0100 Subject: [PATCH 4/5] style: Remove unnecessary space --- laser_encoders/download_models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/laser_encoders/download_models.py b/laser_encoders/download_models.py index 1ec18d4b..65295162 100644 --- a/laser_encoders/download_models.py +++ b/laser_encoders/download_models.py @@ -47,7 +47,6 @@ def __init__(self, model_dir: str = None): def download(self, filename: str): url = os.path.join(self.base_url, filename) - local_file_path = os.path.join(self.model_dir, filename) if os.path.exists(local_file_path): From fa6d80b10dae10b578d020954d4b9fe79b970077 Mon Sep 17 00:00:00 2001 From: paul Date: Wed, 11 Oct 2023 13:43:39 +0100 Subject: [PATCH 5/5] Fix OSError by using shutil.move for cross-filesystem moves Using os.rename caused an OSError when trying to move files across different filesystems (e.g., from /tmp to another directory). By using shutil.move, we gracefully handle such situations, ensuring files are moved correctly regardless of the source and destination filesystems. --- laser_encoders/download_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/laser_encoders/download_models.py b/laser_encoders/download_models.py index 65295162..452501d3 100644 --- a/laser_encoders/download_models.py +++ b/laser_encoders/download_models.py @@ -17,6 +17,7 @@ import argparse import logging import os +import shutil import sys import tempfile from pathlib import Path @@ -67,7 +68,7 @@ def download(self, filename: str): progress_bar.update(len(chunk)) progress_bar.close() - os.rename(temp_file_path, local_file_path) + shutil.move(temp_file_path, local_file_path) def get_language_code(self, language_list: dict, lang: str) -> str: try: