Skip to content

Commit

Permalink
V3.9.6 (#135)
Browse files Browse the repository at this point in the history
1) BUGFIX text(): skip resp if resp.status_code==202,
2) BUGFIX images(): add the "Accept-Language" parameter to the headers.
  • Loading branch information
deedy5 authored Nov 18, 2023
1 parent 16d56a6 commit c6e70e1
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
25 changes: 18 additions & 7 deletions duckduckgo_search/duckduckgo_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None:
headers = {
"User-Agent": choice(USERAGENTS),
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://duckduckgo.com/",
}
self._client = httpx.Client(headers=headers, proxies=proxies, timeout=timeout, http2=True)
Expand All @@ -44,9 +45,11 @@ def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._models.Re
for i in range(3):
try:
resp = self._client.request(method, url, follow_redirects=True, **kwargs)
if _is_500_in_url(str(resp.url)) or resp.status_code == 202:
if _is_500_in_url(str(resp.url)):
raise httpx._exceptions.HTTPError("")
resp.raise_for_status()
if resp.status_code == 202:
return 202
if resp.status_code == 200:
return resp
except Exception as ex:
Expand Down Expand Up @@ -129,7 +132,7 @@ def _text_api(
"kl": region,
"l": region,
"bing_market": f"{region.split('-')[0]}-{region.split('-')[-1].upper()}",
"s": 0,
"s": "0",
"df": timelimit,
"vqd": vqd,
"o": "json",
Expand All @@ -144,10 +147,13 @@ def _text_api(
payload["p"] = "1"

cache = set()
for _ in range(10):
for _ in range(11):
resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue
try:
page_data = resp.json().get("results", None)
except Exception:
Expand Down Expand Up @@ -200,15 +206,19 @@ def _text_html(
safesearch_base = {"on": 1, "moderate": -1, "off": -2}
payload = {
"q": keywords,
"s": "0",
"kl": region,
"p": safesearch_base[safesearch.lower()],
"df": timelimit,
}
cache: Set[str] = set()
for _ in range(10):
for _ in range(11):
resp = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue

tree = html.fromstring(resp.content)
if tree.xpath('//div[@class="no-results"]/text()'):
Expand Down Expand Up @@ -239,7 +249,6 @@ def _text_html(
names = next_page.xpath('.//input[@type="hidden"]/@name')
values = next_page.xpath('.//input[@type="hidden"]/@value')
payload = {n: v for n, v in zip(names, values)}
# sleep(0.75)

def _text_lite(
self,
Expand Down Expand Up @@ -271,10 +280,13 @@ def _text_lite(
"df": timelimit,
}
cache: Set[str] = set()
for _ in range(10):
for _ in range(11):
resp = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue

if b"No more results." in resp.content:
return
Expand Down Expand Up @@ -309,7 +321,6 @@ def _text_lite(
return
payload["s"] = next_page_s[0]
payload["vqd"] = _extract_vqd(resp.content)
# sleep(0.75)

def images(
self,
Expand Down
25 changes: 18 additions & 7 deletions duckduckgo_search/duckduckgo_search_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None:
headers = {
"User-Agent": choice(USERAGENTS),
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://duckduckgo.com/",
}
self._client = httpx.AsyncClient(headers=headers, proxies=proxies, timeout=timeout, http2=True)
Expand All @@ -44,9 +45,11 @@ async def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._mod
for i in range(3):
try:
resp = await self._client.request(method, url, follow_redirects=True, **kwargs)
if _is_500_in_url(str(resp.url)) or resp.status_code == 202:
if _is_500_in_url(str(resp.url)):
raise httpx._exceptions.HTTPError("")
resp.raise_for_status()
if resp.status_code == 202:
return 202
if resp.status_code == 200:
return resp
except Exception as ex:
Expand Down Expand Up @@ -131,7 +134,7 @@ async def _text_api(
"kl": region,
"l": region,
"bing_market": region,
"s": 0,
"s": "0",
"df": timelimit,
"vqd": vqd,
"o": "json",
Expand All @@ -146,10 +149,13 @@ async def _text_api(
payload["p"] = "1"

cache = set()
for _ in range(10):
for _ in range(11):
resp = await self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue
try:
page_data = resp.json().get("results", None)
except Exception:
Expand Down Expand Up @@ -202,15 +208,19 @@ async def _text_html(
safesearch_base = {"on": 1, "moderate": -1, "off": -2}
payload = {
"q": keywords,
"s": "0",
"kl": region,
"p": safesearch_base[safesearch.lower()],
"df": timelimit,
}
cache: Set[str] = set()
for _ in range(10):
for _ in range(11):
resp = await self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue

tree = html.fromstring(resp.content)
if tree.xpath('//div[@class="no-results"]/text()'):
Expand Down Expand Up @@ -241,7 +251,6 @@ async def _text_html(
names = next_page.xpath('.//input[@type="hidden"]/@name')
values = next_page.xpath('.//input[@type="hidden"]/@value')
payload = {n: v for n, v in zip(names, values)}
# await asyncio.sleep(0.75)

async def _text_lite(
self,
Expand Down Expand Up @@ -273,10 +282,13 @@ async def _text_lite(
"df": timelimit,
}
cache: Set[str] = set()
for _ in range(10):
for _ in range(11):
resp = await self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
if resp is None:
return
if resp == 202:
payload["s"] = f"{int(payload['s']) + 50}"
continue

if b"No more results." in resp.content:
return
Expand Down Expand Up @@ -311,7 +323,6 @@ async def _text_lite(
return
payload["s"] = next_page_s[0]
payload["vqd"] = _extract_vqd(resp.content)
# await asyncio.sleep(0.75)

async def images(
self,
Expand Down
2 changes: 1 addition & 1 deletion duckduckgo_search/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.9.5"
__version__ = "3.9.6"

0 comments on commit c6e70e1

Please sign in to comment.