-
-
Notifications
You must be signed in to change notification settings - Fork 430
/
setup.py
68 lines (67 loc) · 2.89 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from setuptools import setup, find_packages
setup(
name="news-please",
version="1.6.13",
description="news-please is an open source easy-to-use news extractor that just works.",
long_description="""\
news-please is an open source, easy-to-use news crawler that extracts structured information from almost any news website. It can follow recursively internal hyperlinks and read RSS feeds to fetch both most recent and also old, archived articles. You only need to provide the root URL of the news website. Furthermore, its API allows developers to access the exctraction functionality within their software. news-please also implements a workflow optimized for the news archive provided by commoncrawl.org, allowing users to efficiently crawl and extract news articles including various filter options.""",
classifiers=[
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: MacOS",
"Operating System :: Microsoft",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet",
"Topic :: Scientific/Engineering :: Information Analysis",
],
keywords="news crawler news scraper news extractor crawler extractor scraper information retrieval",
author="Felix Hamborg",
author_email="[email protected]",
url="https://github.com/fhamborg/news-please",
download_url="https://github.com/fhamborg/news-please",
license="Apache License 2.0",
packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
include_package_data=True,
zip_safe=False,
install_requires=[
"Scrapy>=1.1.0",
"PyMySQL>=0.7.9",
"psycopg2-binary>=2.8.4",
"hjson>=1.5.8",
"elasticsearch>=2.4",
"beautifulsoup4>=4.3.2",
"readability-lxml>=0.6.2",
"langdetect>=1.0.7",
"python-dateutil>=2.4.0",
"plac>=0.9.6",
"dotmap>=1.2.17",
"PyDispatcher>=2.0.5",
"warcio>=1.3.3",
"ago>=0.0.9",
"six>=1.10.0",
"lxml>=3.3.5",
"hurry.filesize>=0.9",
"bs4",
"faust-cchardet>=2.1.18",
"boto3",
"redis",
"newspaper4k>=0.9.3.1",
"lxml-html-clean>=0.1.1",
"typing-extensions>=4.7.0",
],
extras_require={':sys_platform == "win32"': ["pywin32>=220"]},
entry_points={
"console_scripts": [
"news-please = newsplease.__main__:main",
"news-please-cc = newsplease.examples.commoncrawl:main",
]
},
)