-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
78 lines (57 loc) · 2.23 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import asyncio
import aiohttp
from bs4 import BeautifulSoup
from textblob import TextBlob
from typing import List, Dict, Any
async def fetch_page(session: aiohttp.ClientSession, url: str) -> str:
"""Fetch the content of a page using aiohttp.
:param session: The aiohttp session
:param url: the URL to fetch
:return: the content of the page
"""
async with session.get(url) as response:
return await response.text()
async def parse_content(html: str) -> List[str]:
"""Parse the content of a page using BeautifulSoup.
:param html: The HTML page
:return: the parsed data
"""
soup = BeautifulSoup(html, "html.parser")
# this might change per website check the actual div structure
review_texts = [
review.text for review in soup.find_all("div", class_="text show-more__control")
]
return review_texts
async def analyze_text(texts: List[str]) -> Dict[str, Any]:
"""Analyze the sentiment of a list of texts using TextBlob.
:param texts: The parsed data to pass to TextBlob
:return: a dict including the average polarity and subjectivity, and the detailed sentiments
"""
sentiments = [TextBlob(text).sentiment for text in texts]
average_polarity = (
sum([s.polarity for s in sentiments]) / len(sentiments) if sentiments else 0
)
average_subjectivity = (
sum([s.subjectivity for s in sentiments]) / len(sentiments) if sentiments else 0
)
return {
"average_polarity": average_polarity,
"average_subjectivity": average_subjectivity,
"detailed_sentiments": sentiments,
}
async def crawl(url: str) -> None:
"""Crawl a page, parse its content, and analyze the sentiment of the text.
:param url: The actual url to crawl
"""
async with aiohttp.ClientSession() as session:
html = await fetch_page(session, url)
texts = await parse_content(html)
analysis = await analyze_text(texts)
print(f"Analysis results for {url}: {analysis}")
async def main() -> None:
"""Gather all the tasks in the event loop."""
urls = ["URLS_1", "URLS_2", "URLS_3"]
tasks = [crawl(url) for url in urls]
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())