-
Notifications
You must be signed in to change notification settings - Fork 0
/
episode.py
70 lines (61 loc) · 2.72 KB
/
episode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from bs4 import BeautifulSoup
from page_constants import MP3_MARKERS
class Episode:
def __init__(self, current_url: str, current_pic: str):
self.url: str = current_url
self.cover: str = current_pic
self.title: str = ""
self.description: str = ""
self.mp3: str = ""
self.episode_num: str = ""
self.add_episode_details()
def to_string(self) -> str:
data = f'URL: {self.url}\n'
data += f'Cover Image: {self.cover}\n'
data += f'Title: {self.title}\n'
data += f'Description: {self.description}\n'
data += f'MP3: {self.mp3}\n'
data += f'Episode: {self.episode_num}\n'
return data
def add_episode_details(self):
from html_utils import HTMLUtils
current_html = HTMLUtils.get_html_from_url(self.url)
soup = BeautifulSoup(current_html, 'html.parser')
my_divs = soup.find_all("div", {"class": "post-block-editorial-title"})
self.title = my_divs[0].text.strip()
pos = self.title.find("–")
self.episode_num = self.title[:pos].strip()
pos = self.episode_num.find(":")
if pos != -1:
self.episode_num = self.episode_num[:pos].strip()
my_divs = soup.find_all("p")
self.description = my_divs[0].text.strip()
pics = my_divs[0].find_all("img")
if len(pics) > 0:
self.cover = pics[0].get("src")
if self.description == "": # Happens on later pages
self.description = my_divs[1].text.strip()
for div in my_divs:
if self.mp3 is not None and self.mp3 != "":
break
current_paragraph = div.text.strip()
mp3s = div.find_all("a")
# Some do not have the "Download..." text
for mp3 in mp3s:
if (mp3 is not None or mp3.text.strip() != "") and \
".mp3" in mp3.text.strip().lower():
self.mp3 = mp3.text.strip()
break
if self.mp3 is "": # Not found the MP3 link yet
for marker in MP3_MARKERS:
if marker.lower() in current_paragraph.lower():
urls = div.find_all("a")
if len(urls) != 0:
self.mp3 = urls[0].get("href").strip()
break
if not self.mp3.endswith("mp3") and not self.mp3.endswith("mp3/"):
print(f'\tWeird MP3 Link:\n\t\tMP3 Link - {self.mp3}\n\t\tPage - {self.url}')
if self.mp3 == "":
print(f'\tNo link found:\n\t\tPage - {self.url}')
if "mp3" in current_html.lower():
print(f'!!! mp3 found in html for page:\n{current_html}')