V3.9.6 (#135)

1) BUGFIX text(): skip resp if resp.status_code==202, 2) BUGFIX images(): add the "Accept-Language" parameter to the headers.
deedy5 · Nov 18, 2023 · c6e70e1 · c6e70e1
1 parent 16d56a6
commit c6e70e1
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 15 deletions.
diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py
@@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None:
             headers = {
                 "User-Agent": choice(USERAGENTS),
                 "Accept": "application/json, text/javascript, */*; q=0.01",
+                "Accept-Language": "en-US,en;q=0.5",
                 "Referer": "https://duckduckgo.com/",
             }
         self._client = httpx.Client(headers=headers, proxies=proxies, timeout=timeout, http2=True)
@@ -44,9 +45,11 @@ def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._models.Re
         for i in range(3):
             try:
                 resp = self._client.request(method, url, follow_redirects=True, **kwargs)
-                if _is_500_in_url(str(resp.url)) or resp.status_code == 202:
+                if _is_500_in_url(str(resp.url)):
                     raise httpx._exceptions.HTTPError("")
                 resp.raise_for_status()
+                if resp.status_code == 202:
+                    return 202
                 if resp.status_code == 200:
                     return resp
             except Exception as ex:
@@ -129,7 +132,7 @@ def _text_api(
             "kl": region,
             "l": region,
             "bing_market": f"{region.split('-')[0]}-{region.split('-')[-1].upper()}",
-            "s": 0,
+            "s": "0",
             "df": timelimit,
             "vqd": vqd,
             "o": "json",
@@ -144,10 +147,13 @@ def _text_api(
             payload["p"] = "1"
 
         cache = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
             try:
                 page_data = resp.json().get("results", None)
             except Exception:
@@ -200,15 +206,19 @@ def _text_html(
         safesearch_base = {"on": 1, "moderate": -1, "off": -2}
         payload = {
             "q": keywords,
+            "s": "0",
             "kl": region,
             "p": safesearch_base[safesearch.lower()],
             "df": timelimit,
         }
         cache: Set[str] = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
 
             tree = html.fromstring(resp.content)
             if tree.xpath('//div[@class="no-results"]/text()'):
@@ -239,7 +249,6 @@ def _text_html(
             names = next_page.xpath('.//input[@type="hidden"]/@name')
             values = next_page.xpath('.//input[@type="hidden"]/@value')
             payload = {n: v for n, v in zip(names, values)}
-            # sleep(0.75)
 
     def _text_lite(
         self,
@@ -271,10 +280,13 @@ def _text_lite(
             "df": timelimit,
         }
         cache: Set[str] = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
 
             if b"No more results." in resp.content:
                 return
@@ -309,7 +321,6 @@ def _text_lite(
                 return
             payload["s"] = next_page_s[0]
             payload["vqd"] = _extract_vqd(resp.content)
-            # sleep(0.75)
 
     def images(
         self,

diff --git a/duckduckgo_search/duckduckgo_search_async.py b/duckduckgo_search/duckduckgo_search_async.py
@@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None:
             headers = {
                 "User-Agent": choice(USERAGENTS),
                 "Accept": "application/json, text/javascript, */*; q=0.01",
+                "Accept-Language": "en-US,en;q=0.5",
                 "Referer": "https://duckduckgo.com/",
             }
         self._client = httpx.AsyncClient(headers=headers, proxies=proxies, timeout=timeout, http2=True)
@@ -44,9 +45,11 @@ async def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._mod
         for i in range(3):
             try:
                 resp = await self._client.request(method, url, follow_redirects=True, **kwargs)
-                if _is_500_in_url(str(resp.url)) or resp.status_code == 202:
+                if _is_500_in_url(str(resp.url)):
                     raise httpx._exceptions.HTTPError("")
                 resp.raise_for_status()
+                if resp.status_code == 202:
+                    return 202
                 if resp.status_code == 200:
                     return resp
             except Exception as ex:
@@ -131,7 +134,7 @@ async def _text_api(
             "kl": region,
             "l": region,
             "bing_market": region,
-            "s": 0,
+            "s": "0",
             "df": timelimit,
             "vqd": vqd,
             "o": "json",
@@ -146,10 +149,13 @@ async def _text_api(
             payload["p"] = "1"
 
         cache = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = await self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
             try:
                 page_data = resp.json().get("results", None)
             except Exception:
@@ -202,15 +208,19 @@ async def _text_html(
         safesearch_base = {"on": 1, "moderate": -1, "off": -2}
         payload = {
             "q": keywords,
+            "s": "0",
             "kl": region,
             "p": safesearch_base[safesearch.lower()],
             "df": timelimit,
         }
         cache: Set[str] = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = await self._get_url("POST", "https://html.duckduckgo.com/html", data=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
 
             tree = html.fromstring(resp.content)
             if tree.xpath('//div[@class="no-results"]/text()'):
@@ -241,7 +251,6 @@ async def _text_html(
             names = next_page.xpath('.//input[@type="hidden"]/@name')
             values = next_page.xpath('.//input[@type="hidden"]/@value')
             payload = {n: v for n, v in zip(names, values)}
-            # await asyncio.sleep(0.75)
 
     async def _text_lite(
         self,
@@ -273,10 +282,13 @@ async def _text_lite(
             "df": timelimit,
         }
         cache: Set[str] = set()
-        for _ in range(10):
+        for _ in range(11):
             resp = await self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload)
             if resp is None:
                 return
+            if resp == 202:
+                payload["s"] = f"{int(payload['s']) + 50}"
+                continue
 
             if b"No more results." in resp.content:
                 return
@@ -311,7 +323,6 @@ async def _text_lite(
                 return
             payload["s"] = next_page_s[0]
             payload["vqd"] = _extract_vqd(resp.content)
-            # await asyncio.sleep(0.75)
 
     async def images(
         self,

diff --git a/duckduckgo_search/version.py b/duckduckgo_search/version.py
@@ -1 +1 @@
-__version__ = "3.9.5"
+__version__ = "3.9.6"