-
Notifications
You must be signed in to change notification settings - Fork 1
/
sync_subs_yt2amara.py
executable file
·172 lines (146 loc) · 5.84 KB
/
sync_subs_yt2amara.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
from subprocess import Popen, PIPE, call, check_call
import argparse, sys, os, requests
from pprint import pprint
from api.amara_api import Amara
from utils import eprint, epprint, download_yt_subtitles
# We suppose that the uploaded subtitles are complete (non-critical)
is_complete = True # do we upload complete subtitles?
# DH TODO automatically handle missing YT sub formats
# converting to srt can be done via
# ffmpeg -i foo.vtt foo.srt
# currently youtube-dl convert does not work when not downloading the video
sub_format = 'vtt'
sub_format2 = "srt"
def read_cmd():
"""Function for reading command line options."""
desc = "Program for syncing subtitles from YouTube to Amara.\n \
Optimized for larger number of subs, should be faster than script amara_upload.py."
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('input_file', metavar='INPUT_FILE', help='Text file containing YouTube IDs in the first column.')
parser.add_argument('-l','--lang', dest = 'lang', required = True, help='Which language do we copy?')
parser.add_argument('-r','--rewrite', dest='rewrite', default = False, action = "store_true", help = 'Rewrite subs on Amara.')
parser.add_argument('-v','--verbose', dest='verbose', default = False, action = "store_true", help = 'Verbose output.')
parser.add_argument('--skip-errors', dest = 'skip', default = False, action = "store_true",
help = 'Skip subtitles that could not be downloaded. \
The list of failed YTIDs will be printed to \"failed_yt.dat\".')
return parser.parse_args()
opts = read_cmd()
# We suppose that the original language is English
original_video_lang = "en"
if opts.lang == "en":
is_original = True # is lang the original language of the video?
else:
is_original = False
print("# Syncing subtitles for language %s" % opts.lang)
ytids = []
# Reading file with YT id's
with open(opts.input_file, "r") as f:
for line in f:
ytids.append(line.split())
try:
os.remove("youtubedl.out")
os.remove("youtubedl.err")
except:
pass
TEMP_DIR = "subs"
try:
os.mkdir(TEMP_DIR)
except:
pass
# Read file with existing videos to speed up the process
# Append every updated video
f_vids = open("videos_on_amara.%s.dat" % opts.lang, 'a+')
f_vids.seek(0)
ytid_exist = set()
for l in f_vids:
ytid_exist.add(l.split()[0])
print("Current number of videos on Amara with subtitles in %s is: %d" % (opts.lang, len(ytid_exist)))
# Skip certain videos
ytid_skip = set()
fname_skip = "sync_yt2amara_skip.%s.dat" % opts.lang
try:
with open(fname_skip,"r") as f:
for l in f:
ytid_skip.add(l.split()[0])
print("Current number of skipped videos on Amara:", len(ytid_skip))
except FileNotFoundError as e:
pass
uploaded = 0
missing = 0
AMARA_USERNAME = 'dhbot'
amara = Amara(AMARA_USERNAME)
# Main loop
for i in range(len(ytids)):
video_present = False
lang_present = False
lang_visible = False
if len(ytids[i]) == 0 or ytids[i][0][0] == "#":
print("")
continue
ytid = ytids[i][0]
video_url = "https://www.youtube.com/watch?v=%s" % ytid
# PART 1: Checking the existence of subtitles on Amara
if ytid in ytid_skip:
continue
if ytid in ytid_exist and not opts.rewrite:
continue
sys.stdout.flush()
sys.stderr.flush()
f_vids.flush()
# Check whether video is on Amara, if not create it
amara_response = amara.check_video(video_url)
if not amara_response or amara_response['meta']['total_count'] == 0:
video_present = False
lang_present = False
else:
video_present = True
amara_id = amara_response['objects'][0]['id']
amara_title = amara_response['objects'][0]['title']
for lg in amara_response['objects'][0]['languages']:
if lg["code"] == opts.lang:
lang_present = True
# lang could be there, but with no revisions
# this key was retired from API
lang_visible = True
#if lg["visible"] == True:
# lang_visible = True
break
lang_present, sub_version = amara.check_language(amara_id, opts.lang)
if lang_present and lang_visible and sub_version > 0:
f_vids.write(ytid + '\n')
if not opts.rewrite:
print("Subtitles already on Amara for video YTID=%s" % ytid)
print("Use \"-r\" option to overwrite them")
continue
# Moved this here due to large number of Amara errors...
if not video_present:
amara_response = amara.add_video(video_url, original_video_lang)
if "id" in amara_response:
amara_id = amara_response['id']
amara_title = amara_response['title']
else:
print("ERROR: During adding video to Amara. YTID=%s" % ytid)
continue
if opts.verbose:
print("Created video on Amara with AmaraId %s" % amara_id)
print("%s/cs/videos/%s" % (amara.AMARA_BASE_URL, amara_id))
# PART 2: GETTING THE SUBTITLES FROM YOUTUBE
# imported from utils.py
subs = download_yt_subtitles(opts.lang, sub_format, ytid, TEMP_DIR)
# PART 3: Creating language on Amara
if not lang_present:
r = amara.add_language(amara_id, opts.lang, is_original)
# PART 4: UPLOADING THE SUBTITLES
r = amara.upload_subs(amara_id, opts.lang, is_complete, subs, sub_format)
if not 'version_number' in r:
print("ERROR: Failed to upload subs to Amara for YTID=%s" % ytid)
epprint(r)
continue
else:
print("Succesfully uploaded subtitles for YTID=%s AmaraID=%s" % (ytid, amara_id))
uploaded += 1
f_vids.write(ytid + '\n')
print("(: And we are finished! :)")
print("Succesfuly uploaded %d video subtitles." % uploaded)
print("%d videos are missing subtitles on YT" % missing)