-
Notifications
You must be signed in to change notification settings - Fork 0
/
funload.py
executable file
·159 lines (120 loc) · 4.87 KB
/
funload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python3
import xml.etree.ElementTree as ElemT
import re
import urllib
import os
from datetime import datetime
namespaces = {'content': "http://purl.org/rss/1.0/modules/content/"}
def download1(config, project, address):
last_build = config.get_last_build(project)
print("Last read build was at " + last_build.isoformat())
print("Fetching..")
rss_text = urllib.urlopen(address)
print("Parsing..")
root = ElemT.parse(rss_text).getroot()
channel = root.find('channel')
current_build_text = channel.find('lastBuildDate').text.strip()
current_build = datetime.strptime(current_build_text, "%a, %d %b %Y %H:%M:%S +0000")
print("Current build is from " + current_build.isoformat())
if current_build > last_build:
print("Storing current build time.")
config.write(project, current_build_text)
for item_node in channel.iter('item'):
for url in item_node_parse(item_node, last_build):
download(url)
else:
print("So no new version available..")
def item_node_parse(item_node, last_build):
urls = []
print(item_node.find('link').text)
item_pub_date_text = item_node.find('pubDate').text
item_pub_date = datetime.strptime(item_pub_date_text, "%a, %d %b %Y %H:%M:%S +0000")
if item_pub_date < last_build:
print("\tOlder than last build..")
else:
print("\tMust be new..")
for enclosure in item_node.iter('enclosure'):
urls.append(enclosure.attrib['url'])
for content in item_node.iterfind('content:encoded', namespaces=namespaces):
for match in re.finditer('https?://(www)?\.youtube[^"\']*', content.text):
urls.append(match.group(0))
for match in re.finditer('SFYouTubePlayer.embedPlayer\("([^"\']*)', content.text):
urls.append(match.group(1))
return urls
def download(url):
if url.find("youtube") != -1:
link = url
if link.find("?") != -1:
link = link[:link.find("?")]
print("\tFound youtube video [" + link + "]")
os.system("youtube-dl -q -o 'funload/%(id)s.%(ext)s' " + link + " > /dev/null 2>&1")
else:
filename = os.path.basename(url)
file_path = 'funload/' + filename
print("\tFound file '" + filename + "'")
if os.path.exists(file_path):
print("\talready exists.")
else:
print("\tdownloading..")
urllib.urlretrieve(url, file_path)
print("\tdone.")
def get_all_new_emok_video_pages(config, project, xml):
links = []
last_build = config.get_last_build(project)
print("Last read build was at " + last_build.isoformat())
print("Parsing..")
root = xml.getroot()
channel = root.find('channel')
current_build_text = channel.find('lastBuildDate').text.strip()
current_build = datetime.strptime(current_build_text, "%a, %d %b %Y %H:%M:%S +0000")
print("Current build is from " + current_build.isoformat())
if current_build > last_build:
print("Storing current build time.")
config.write(project, current_build_text)
for item_node in channel.iter('item'):
print(item_node.find('link').text)
item_pub_date_text = item_node.find('pubDate').text
item_pub_date = datetime.strptime(item_pub_date_text, "%a, %d %b %Y %H:%M:%S +0000")
if item_pub_date < last_build:
print("\tOlder than last build..")
else:
links.append(item_node.find('link').text)
else:
print("So no new version available..")
return links
def extract_emok_video_urls(file):
urls = []
for url_match in re.finditer("\"(http://[^\"]+\.mp4)\"", file.read()):
urls.append(url_match.group(1))
return urls
def hornoxe(config):
download1(config, 'hornoxe', "http://hornoxe.com/feed/")
def orschlurch(config):
download1(config, 'orschlurch', "http://www.orschlurch.net/kategorie/videos/feed/")
def emok(config):
xml = ElemT.parse(urllib.urlopen("http://www.emok.tv/category/own-content/feed"))
links = get_all_new_emok_video_pages(config, 'emok', xml)
urls = []
for link in links:
urls.extend(extract_emok_video_urls(urllib.urlopen(link)))
for url in urls:
download(url)
def main():
config = Config()
hornoxe(config)
orschlurch(config)
emok(config)
class Config():
def config_file(self, project, mode):
return open(os.path.expanduser("~/.funload/" + project), mode)
def get_last_build(self, project):
f = self.config_file(project, 'r')
last_build = datetime.strptime(f.readline().rstrip('\n'), "%a, %d %b %Y %H:%M:%S +0000")
f.close()
return last_build
def write(self, project, content):
f = self.config_file(project, 'w')
f.write(content)
f.close()
if __name__ == "__main__":
main()