Skip to content

Commit

Permalink
Modify get_async for better error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
akneni committed Mar 5, 2024
1 parent 4d91ee6 commit d1fe939
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
8 changes: 5 additions & 3 deletions pygrab/pygrab.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,18 @@ def get_async(
if timeout is None:
timeout = int( (25 if enable_js else 8) * (1.75 if Tor.tor_status() else 1) )

# remove repeats to prevent possible DoS attacks
urls = list(dict.fromkeys(urls))

# Handle query params
if params is not None:
if isinstance(params, dict):
urls = [__append_query_params(url, params) for url in urls]
else:
if len(urls) != len(params):
raise ValueError("Arguments `urls` and `params` must be of the same length.")
urls = [__append_query_params(url, param) for url, param in zip(urls, params)]

# remove repeats to prevent accidental DoS attacks
urls = list(dict.fromkeys(urls))

# Handle async js enabled scraping
if enable_js:
# Don't increment the number of requests, but rotate connections if it's necessary
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='pygrab',
version='3.0.5',
version='3.0.6',
description='A secure python library for fetching data with async, JS, and Tor support',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down

0 comments on commit d1fe939

Please sign in to comment.