Skip to content

Commit

Permalink
Merge pull request #15 from deedy5/v2.1
Browse files Browse the repository at this point in the history
v2.1
1)    code redesigned to remove lxml and brotli from the requirements,
2)    dded lru_cache for the function get_vqd(),
3)    time delays removed.
  • Loading branch information
deedy5 authored Sep 11, 2022
2 parents 90c1734 + 5f14ad1 commit df14923
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 175 deletions.
27 changes: 6 additions & 21 deletions duckduckgo_search/ddg.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import json
import logging
from datetime import datetime
from time import sleep

from requests import ConnectionError

from .utils import _normalize, _save_csv, _save_json, get_vqd, session
from .utils import _do_output, _get_vqd, _normalize, session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -33,12 +30,11 @@ def ddg(
"""

if not keywords:
return
return None

vqd = get_vqd(keywords)
vqd = _get_vqd(keywords)
if not vqd:
return
sleep(0.75)
return None

# search
safesearch_base = {"On": 1, "Moderate": -1, "Off": -2}
Expand Down Expand Up @@ -96,7 +92,6 @@ def ddg(
if not page_results:
break
results.extend(page_results)
sleep(0.75)

""" using html method
payload = {
Expand Down Expand Up @@ -126,16 +121,6 @@ def ddg(
sleep(2)
"""
results = results[:max_results]

# output
keywords = keywords.replace('"', "'")
if output == "csv":
_save_csv(f"ddg_{keywords}_{datetime.now():%Y%m%d_%H%M%S}.csv", results)
elif output == "json":
_save_json(f"ddg_{keywords}_{datetime.now():%Y%m%d_%H%M%S}.json", results)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=4))
input()

if output:
_do_output(__name__, keywords, output, results)
return results
28 changes: 7 additions & 21 deletions duckduckgo_search/ddg_images.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import json
import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from time import sleep

from requests import ConnectionError

from .utils import _download_image, _save_csv, _save_json, get_vqd, session
from .utils import _do_output, _download_image, _get_vqd, session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -57,10 +55,9 @@ def ddg_images(
if not keywords:
return None

vqd = get_vqd(keywords)
vqd = _get_vqd(keywords)
if not vqd:
return
sleep(0.75)
return None

# get images
safesearch_base = {"On": 1, "Moderate": -1, "Off": -2}
Expand Down Expand Up @@ -119,24 +116,12 @@ def ddg_images(
results.extend(page_results)
# pagination
payload["s"] += 100
sleep(0.2)

results = results[:max_results]
if output:
_do_output(__name__, keywords, output, results)

# output
keywords = keywords.replace('"', "'")
if output == "csv":
_save_csv(f"ddg_images_{keywords}_{datetime.now():%Y%m%d_%H%M%S}.csv", results)
elif output == "json":
_save_json(
f"ddg_images_{keywords}_{datetime.now():%Y%m%d_%H%M%S}.json", results
)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=2))
input()

# download
# download images
if download:
print("Downloading images. Wait...")
keywords = keywords.replace('"', "'")
Expand All @@ -145,6 +130,7 @@ def ddg_images(
futures = []
with ThreadPoolExecutor(30) as executor:
for i, res in enumerate(results, start=1):
print(i)
filename = res["image"].split("/")[-1].split("?")[0]
future = executor.submit(
_download_image, res["image"], path, f"{i}_{filename}"
Expand Down
32 changes: 7 additions & 25 deletions duckduckgo_search/ddg_maps.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import json
import logging
from collections import deque
from dataclasses import dataclass
from datetime import datetime
from decimal import Decimal
from time import sleep

import requests
from requests import ConnectionError

from .utils import _normalize, _save_csv, _save_json, get_vqd, headers, session
from .utils import _do_output, _get_vqd, _normalize, headers, session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,10 +70,9 @@ def ddg_maps(
return None

# get vqd
vqd = get_vqd(keywords)
vqd = _get_vqd(keywords)
if not vqd:
return
sleep(0.75)
return None

# if longitude and latitude are specified, skip the request about bbox to the nominatim api
if latitude and longitude:
Expand Down Expand Up @@ -204,22 +200,8 @@ def ddg_maps(
work_bboxes.extendleft([bbox1, bbox2, bbox3, bbox4])

print(f"Found {len(results)}")
sleep(0.2)

# output
keywords = keywords.replace('"', "'")
if output == "csv":
_save_csv(
f"ddg_maps_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
results,
)
elif output == "json":
_save_json(
f"ddg_maps_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
results,
)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=2))
input()

results = results[:max_results]
if output:
_do_output(__name__, keywords, output, results)
return results
33 changes: 6 additions & 27 deletions duckduckgo_search/ddg_news.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import json
import logging
from datetime import datetime
from time import sleep

from requests import ConnectionError

from .utils import _normalize, _save_csv, _save_json, get_vqd, session
from .utils import _do_output, _get_vqd, _normalize, session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -36,10 +34,9 @@ def ddg_news(
return None

# get vqd
vqd = get_vqd(keywords)
vqd = _get_vqd(keywords)
if not vqd:
return
sleep(0.75)
return None

# get news
safesearch_base = {"On": 1, "Moderate": -1, "Off": -2}
Expand Down Expand Up @@ -92,26 +89,8 @@ def ddg_news(
results.extend(page_results)
# pagination
params["s"] += 30
sleep(0.2)

results = results[:max_results]
# sort by datetime
results = sorted(results, key=lambda x: x["date"], reverse=True)

# output
keywords = keywords.replace('"', "'")
if output == "csv":
_save_csv(
f"ddg_news_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
results,
)
elif output == "json":
_save_json(
f"ddg_news_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
results,
)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=2))
input()
results = sorted(results[:max_results], key=lambda x: x["date"], reverse=True)
if output:
_do_output(__name__, keywords, output, results)
return results
30 changes: 6 additions & 24 deletions duckduckgo_search/ddg_translate.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import json
import logging
from datetime import datetime
from time import sleep

from requests import ConnectionError

from .utils import _save_csv, _save_json, get_vqd, session
from .utils import _do_output, _get_vqd, session

logger = logging.getLogger(__name__)

Expand All @@ -32,10 +29,9 @@ def ddg_translate(
return None

# get vqd
vqd = get_vqd("translate")
vqd = _get_vqd("translate")
if not vqd:
return
sleep(0.75)
return None

# translate
params = {
Expand Down Expand Up @@ -66,22 +62,8 @@ def ddg_translate(
logger.error("Connection Error.")
except Exception:
logger.exception("Exception.", exc_info=True)
sleep(0.2)

# output
keywords = keywords[0].replace('"', "'")
if output == "csv":
_save_csv(
f"ddg_translate_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
results,
)
elif output == "json":
_save_json(
f"ddg_translate_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
results,
)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=2))
input()
if output:
keywords = keywords[0]
_do_output(__name__, keywords, output, results)
return results
33 changes: 6 additions & 27 deletions duckduckgo_search/ddg_videos.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import json
import logging
from datetime import datetime
from time import sleep

from requests import ConnectionError

from .utils import _save_csv, _save_json, get_vqd, session
from .utils import _do_output, _get_vqd, session

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -39,13 +36,12 @@ def ddg_videos(
"""

if not keywords:
return
return None

# get vqd
vqd = get_vqd(keywords)
vqd = _get_vqd(keywords)
if not vqd:
return
sleep(0.75)
return None

# get videos
safesearch_base = {"On": 1, "Moderate": -1, "Off": -2}
Expand Down Expand Up @@ -93,25 +89,8 @@ def ddg_videos(
results.extend(page_results)
# for pagination
payload["s"] += 62
sleep(0.2)

results = results[:max_results]

# output
keywords = keywords.replace('"', "'")
if output == "csv":
_save_csv(
f"ddg_videos_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
results,
)
elif output == "json":
_save_json(
f"ddg_videos_{keywords}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
results,
)
elif output == "print":
for i, result in enumerate(results, start=1):
print(f"{i}.", json.dumps(result, ensure_ascii=False, indent=2))
input()

if output:
_do_output(__name__, keywords, output, results)
return results
Loading

0 comments on commit df14923

Please sign in to comment.