-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_subtitles_on_amara.py
executable file
·119 lines (104 loc) · 3.96 KB
/
check_subtitles_on_amara.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
import argparse, sys, requests
from time import sleep
from pprint import pprint
from api.amara_api import Amara
def read_cmd():
"""Function for reading command line options."""
desc = "Program for finding existing subtitles on Amara"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('input_file', metavar='INPUT_FILE',
help='Text file containing YouTube IDs.')
parser.add_argument(
'-s', '--sleep', dest = 'sleep_int',
required = False, type = float, default = -1,
help='Sleep interval (seconds)')
parser.add_argument(
'-l', '--lang', dest = 'lang',
required = False, default = None,
help='What language?')
return parser.parse_args()
def find_subtitles(present_languages, lang, amara_id):
"""Check whether video on Amara has subtitles for a given lang
present_langauges - part of response from amara.check_video
lang - target language
"""
subs_published = subs_incomplete = False
for lang in present_languages:
if lang['code'] == opts.lang:
if lang['published']:
subs_published = True
else:
is_present, sub_version = amara.check_language(amara_id, opts.lang)
if is_present and sub_version > 0:
subs_incomplete = True
break
return subs_published, subs_incomplete
opts = read_cmd()
AMARA_TEAM = 'khan-academy'
# List ytids may also contain filenames
ytids = []
# Reading file with YT id's
with open(opts.input_file, "r") as f:
for line in f:
ytids.append(line.split())
AMARA_USERNAME = 'dhbot'
amara = Amara(AMARA_USERNAME)
# Main loop
for i in range(len(ytids)):
if len(ytids[i]) == 0:
print("")
continue
ytid = ytids[i][0]
# Trying to reduce E 429
if opts.sleep_int > 0:
sleep(opts.sleep_int)
sys.stdout.flush()
sys.stderr.flush()
video_url = 'https://www.youtube.com/watch?v=%s' % ytid
amara_team_id = amara_public_id = None
# Check whether the video is already on Amara
amara_response = amara.check_video(video_url)
for r in amara_response['objects']:
if r['team'] == AMARA_TEAM:
amara_team_id = r['id']
video_title = r['title']
team_languages = r['languages']
elif r['team'] is None:
video_title = r['title']
amara_public_id = r['id']
public_languages = r['languages']
if amara_team_id is not None:
amara_url = "%s/%s/videos/%s/%s/?team=%s" % (
amara.AMARA_BASE_URL,
opts.lang,
amara_team_id,
opts.lang,
AMARA_TEAM,
)
subs_published, subs_incomplete = find_subtitles(
team_languages,
opts.lang,
amara_team_id
)
if subs_published:
print("Subtitles published on Team Amara.\t%s\t%s\t%s\t%s\t%s" % (ytid, video_url,
video_title, amara_team_id, amara_url))
elif subs_incomplete:
print("Incomplete subtitles found on Team Amara.\t%s\t%s\t%s\t%s\t%s" % (ytid, video_url,
video_title, amara_team_id, amara_url))
elif amara_public_id is not None:
amara_url = "%s/%s/videos/%s/%s/" % \
(amara.AMARA_BASE_URL, opts.lang, amara_public_id, opts.lang)
subs_published, subs_incomplete = find_subtitles(
public_languages,
opts.lang,
amara_public_id
)
if subs_published:
print("Subtitles published on Public Amara.\t%s\t%s\t%s\t%s\t%s" % (ytid, video_url,
video_title, amara_public_id, amara_url))
elif subs_incomplete:
print("Incomplete subtitles found on Public Amara.\t%s\t%s\t%s\t%s\t%s" % (ytid, video_url,
video_title, amara_public_id, amara_url))
print(lang)