From 73a04aba3547f19209567df791561c24d640fda9 Mon Sep 17 00:00:00 2001 From: yzqzss Date: Wed, 9 Oct 2024 12:04:01 +0800 Subject: [PATCH] move hard_reties config to OtherConfig --- wikiteam3/dumpgenerator/api/page_titles.py | 2 +- wikiteam3/dumpgenerator/cli/cli.py | 7 ++++--- wikiteam3/dumpgenerator/config.py | 5 +++-- wikiteam3/dumpgenerator/dump/image/image.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/wikiteam3/dumpgenerator/api/page_titles.py b/wikiteam3/dumpgenerator/api/page_titles.py index 84575243..c5d8eabb 100644 --- a/wikiteam3/dumpgenerator/api/page_titles.py +++ b/wikiteam3/dumpgenerator/api/page_titles.py @@ -27,7 +27,7 @@ def getPageTitlesAPI(config: Config, session: requests.Session): delay_session = SessionMonkeyPatch( session=session, config=config, add_delay=True, delay_msg="Session delay: "+__name__, - hard_retries=3 + hard_retries=3 # TODO: --hard-retries ) delay_session.hijack() for namespace in namespaces: diff --git a/wikiteam3/dumpgenerator/cli/cli.py b/wikiteam3/dumpgenerator/cli/cli.py index e850e485..37e34647 100644 --- a/wikiteam3/dumpgenerator/cli/cli.py +++ b/wikiteam3/dumpgenerator/cli/cli.py @@ -52,7 +52,7 @@ def getArgumentParser(): "--retries", metavar="5", default=5, help="Maximum number of retries for each request before failing." ) parser.add_argument( - "--hard-retries", metavar="3", default=3, help="Maximum number of hard retries for each request before failing." + "--hard-retries", metavar="3", default=3, help="Maximum number of hard retries for each request before failing. (for now, this only controls the hard retries during images downloading)" ) parser.add_argument("--path", help="path to store wiki dump at") parser.add_argument( @@ -293,7 +293,7 @@ def get_parameters(params=None) -> Tuple[Config, OtherConfig]: # Create session mod_requests_text(requests) # monkey patch # type: ignore session = requests.Session() - patch_sess = SessionMonkeyPatch(session=session, hard_retries=int(args.hard_retries)) + patch_sess = SessionMonkeyPatch(session=session, hard_retries=1) # hard retry once to avoid spending too much time on initial detection patch_sess.hijack() def print_request(r: requests.Response, *args, **kwargs): # TODO: use logging @@ -534,7 +534,6 @@ def sleep(self, response=None): path = args.path and os.path.normpath(args.path) or "", delay = args.delay, retries = int(args.retries), - hard_retries = int(args.hard_retries), ) @@ -552,6 +551,8 @@ def sleep(self, response=None): assert_max_images = args.assert_max_images, assert_max_images_bytes = args.assert_max_images_bytes, + hard_retries = int(args.hard_retries), + upload = args.upload, uploader_args = args.uploader_args, ) diff --git a/wikiteam3/dumpgenerator/config.py b/wikiteam3/dumpgenerator/config.py index 93f533e2..517502e3 100644 --- a/wikiteam3/dumpgenerator/config.py +++ b/wikiteam3/dumpgenerator/config.py @@ -26,8 +26,6 @@ def asdict(self): """ Delay between requests """ retries: int = 0 """ Number of retries """ - hard_retries: int = 0 - """ Number of hard retries """ path: str = '' """ Path to save the wikidump """ logs: bool = False @@ -115,5 +113,8 @@ class OtherConfig: assert_max_images: Optional[int] assert_max_images_bytes: Optional[int] + hard_retries: int + """ Number of hard retries """ + upload: bool uploader_args: List[str] \ No newline at end of file diff --git a/wikiteam3/dumpgenerator/dump/image/image.py b/wikiteam3/dumpgenerator/dump/image/image.py index de53fe42..9ecde193 100644 --- a/wikiteam3/dumpgenerator/dump/image/image.py +++ b/wikiteam3/dumpgenerator/dump/image/image.py @@ -107,7 +107,7 @@ def modify_headers(headers: Optional[Dict] = None) -> Dict: return headers - patch_sess = SessionMonkeyPatch(session=session, config=config, hard_retries=config.hard_retries) + patch_sess = SessionMonkeyPatch(session=session, config=config, hard_retries=other.hard_retries) patch_sess.hijack() ia_session = requests.Session()