From 1f29ec17a36d3e8506569fe8a1e2bcb303d086b9 Mon Sep 17 00:00:00 2001 From: Brooks Travis Date: Mon, 7 Oct 2024 10:35:46 -0500 Subject: [PATCH 1/3] Reworking how file paths are handled to support relative and absolute patterns and python 3.9. Also bumping httpx version to match folioclient. --- pyproject.toml | 2 +- src/folio_data_import/MARCDataImport.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 617208b..acdd2bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ folio-user-import = "folio_data_import.UserImport:sync_main" [tool.poetry.dependencies] python = "^3.9" folioclient = "^0.60.5" -httpx = "^0.23.0" +httpx = "^0.27.2" pymarc = "^5.2.2" pyhumps = "^3.8.0" inquirer = "^3.4.0" diff --git a/src/folio_data_import/MARCDataImport.py b/src/folio_data_import/MARCDataImport.py index 14e6a6f..92ffcf3 100644 --- a/src/folio_data_import/MARCDataImport.py +++ b/src/folio_data_import/MARCDataImport.py @@ -3,6 +3,7 @@ import glob import io import os +import sys from typing import List import uuid from contextlib import ExitStack @@ -491,6 +492,17 @@ async def main() -> None: if args.member_tenant_id: folio_client.okapi_headers["x-okapi-tenant"] = args.member_tenant_id + if os.path.isabs(args.marc_file_path): + marc_files = [Path(x) for x in glob.glob(args.marc_file_path)] + else: + marc_files = list(Path("./").glob(args.marc_file_path)) + + if len(marc_files) == 0: + print(f"No files found matching {args.marc_file_path}. Exiting.") + sys.exit(1) + else: + print(marc_files) + if not args.import_profile_name: import_profiles = folio_client.folio_get( "/data-import-profiles/jobProfiles", @@ -511,8 +523,6 @@ async def main() -> None: ] answers = inquirer.prompt(questions) args.import_profile_name = answers["import_profile_name"] - marc_files = [Path(x) for x in glob.glob(args.marc_file_path, root_dir="./")] - print(marc_files) try: await MARCImportJob( folio_client, From 04ea6bf2ed96e7127f97feae8f38f01efd1c86ee Mon Sep 17 00:00:00 2001 From: Brooks Travis Date: Mon, 7 Oct 2024 10:36:32 -0500 Subject: [PATCH 2/3] Bumping version to 0.2.5 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index acdd2bb..0af493f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "folio_data_import" -version = "0.2.4" +version = "0.2.5" description = "A python module to interact with the data importing capabilities of the open-source FOLIO ILS" authors = ["Brooks Travis "] license = "MIT" From b2971b2da5f9d47f1417229ad83b91dd7847ac51 Mon Sep 17 00:00:00 2001 From: Brooks Travis Date: Mon, 7 Oct 2024 14:19:06 -0500 Subject: [PATCH 3/3] Make job status/summary checks more fault-tolerant with ratcheting retry timeout behavior. --- src/folio_data_import/MARCDataImport.py | 54 +++++++++++++++++++++---- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/folio_data_import/MARCDataImport.py b/src/folio_data_import/MARCDataImport.py index 92ffcf3..de88b45 100644 --- a/src/folio_data_import/MARCDataImport.py +++ b/src/folio_data_import/MARCDataImport.py @@ -31,6 +31,9 @@ # The order in which the report summary should be displayed REPORT_SUMMARY_ORDERING = {"created": 0, "updated": 1, "discarded": 2, "error": 3} +# Set default timeout and backoff values for HTTP requests when retrying job status and final summary checks +RETRY_TIMEOUT_START = 1 +RETRY_TIMEOUT_RETRY_FACTOR = 2 class MARCImportJob: """ @@ -80,6 +83,7 @@ def __init__( self.import_profile_name = import_profile_name self.batch_size = batch_size self.batch_delay = batch_delay + self.current_retry_timeout = None async def do_work(self) -> None: """ @@ -149,10 +153,23 @@ async def get_job_status(self) -> None: Raises: IndexError: If the job execution with the specified ID is not found. """ - job_status = self.folio_client.folio_get( - "/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny" - "=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50" - ) + try: + self.current_retry_timeout = ( + self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR + ) if self.current_retry_timeout else RETRY_TIMEOUT_START + job_status = self.folio_client.folio_get( + "/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny" + "=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50" + ) + self.current_retry_timeout = None + except httpx.ConnectTimeout: + sleep(.25) + with httpx.Client( + timeout=self.current_retry_timeout, + verify=self.folio_client.ssl_verify + ) as temp_client: + self.folio_client.httpx_client = temp_client + return await self.get_job_status() try: status = [ job for job in job_status["jobExecutions"] if job["id"] == self.job_id @@ -393,9 +410,7 @@ async def import_marc_file(self) -> None: await self.get_job_status() sleep(1) if self.finished: - job_summary = self.folio_client.folio_get( - f"/metadata-provider/jobSummary/{self.job_id}" - ) + job_summary = await self.get_job_summary() job_summary.pop("jobExecutionId") job_summary.pop("totalErrors") columns = ["Summary"] + list(job_summary.keys()) @@ -426,6 +441,31 @@ async def import_marc_file(self) -> None: self.last_current = 0 self.finished = False + async def get_job_summary(self) -> dict: + """ + Retrieves the job summary for the current job execution. + + Returns: + dict: The job summary for the current job execution. + """ + try: + self.current_retry_timeout = ( + self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR + ) if self.current_retry_timeout else RETRY_TIMEOUT_START + job_summary = self.folio_client.folio_get( + f"/metadata-provider/jobSummary/{self.job_id}" + ) + self.current_retry_timeout = None + except httpx.ReadTimeout: # + sleep(.25) + with httpx.Client( + timeout=self.current_retry_timeout, + verify=self.folio_client.ssl_verify + ) as temp_client: + self.folio_client.httpx_client = temp_client + return await self.get_job_summary() + return job_summary + async def main() -> None: """