forked from lalinsky/musicbrainz-bot
-
Notifications
You must be signed in to change notification settings - Fork 13
/
isrc_spotify.py
executable file
·181 lines (153 loc) · 6.76 KB
/
isrc_spotify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
# -*- coding: utf-8 -*-
# (c) 2012 Ian Weller, Aurélien Mino
# This program is free software. It comes without any warranty, to the extent
# permitted by applicable law. You can redistribute it and/or modify it under
# the terms of the Do What The Fuck You Want To Public License, Version 2, as
# published by Sam Hocevar. See COPYING for more details.
import re
import mechanize
import sqlalchemy
import musicbrainzngs
from kitchen.text.converters import to_unicode
from picard.similarity import similarity2
from editing import MusicBrainzClient
from utils import out, colored_out, bcolors
from mbbot.source.spotify import SpotifyWebService
import config as cfg
import codecs
'''
CREATE TABLE bot_isrc_spotify (
release uuid NOT NULL,
processed timestamp with time zone DEFAULT now(),
CONSTRAINT bot_isrc_spotify_pkey PRIMARY KEY (release)
)
'''
musicbrainzngs.set_useragent(
"musicbrainz-bot",
"1.0",
"%s/user/%s" % (cfg.MB_SITE, cfg.MB_USERNAME)
)
query_releases_wo_isrcs = '''
WITH
releases_wo_isrcs AS (
SELECT DISTINCT r.id, r.gid, r.name, r.barcode, r_country.iso_code AS country, r.artist_credit
FROM s_release r
JOIN medium ON medium.release = r.id
LEFT JOIN country r_country ON r.country = r_country.id
JOIN artist_credit ac ON r.artist_credit = ac.id
JOIN artist_credit_name acn ON acn.artist_credit = ac.id
JOIN artist a ON a.id = acn.artist
LEFT JOIN country a_country ON a.country = a_country.id
WHERE r.barcode IS NOT NULL AND r.barcode != ''
/* Release has no ISRCs */
AND NOT EXISTS (SELECT 1 FROM track JOIN isrc ON isrc.recording = track.recording WHERE medium.tracklist = track.tracklist)
/* AND a_country.iso_code = 'FR' AND r_country.iso_code = 'FR' */
)
SELECT r.id, r.gid, r.name, r.barcode, ac.name AS artist, b.processed
FROM releases_wo_isrcs tr
JOIN s_release r ON tr.id = r.id
JOIN s_artist_credit ac ON r.artist_credit = ac.id
LEFT JOIN bot_isrc_spotify b ON b.release = r.gid
ORDER BY b.processed NULLS FIRST, r.artist_credit
LIMIT 100
'''
query_tracks = '''
SELECT r.gid, r.name, r.length, m.position || '.' || t.position AS position
FROM medium m
JOIN track t ON m.tracklist = t.tracklist
JOIN s_recording r ON r.id = t.recording
WHERE m.release = %s
ORDER BY m.position, t.position
'''
engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB)
def similarity(a, b):
return int(similarity2(to_unicode(a), to_unicode(b)) * 100)
def compare_data(mb_release, sp_release):
name = similarity(mb_release['name'], sp_release['name'])
artist = similarity(mb_release['artist'], sp_release['artist'])
if abs(len(mb_release['tracks']) - len(sp_release['tracks'])) != 0:
return 0
track = []
track_time_diff = []
track_sim = []
for i in range(len(mb_release['tracks'])):
track.append(similarity(mb_release['tracks'][i]['name'], sp_release['tracks'][i]['name']))
track_time_diff.append(abs(mb_release['tracks'][i]['length'] - sp_release['tracks'][i]['length'] * 1000) / 1000)
if track_time_diff[i] > 15:
track_time_sim = 0
else:
track_time_sim = int((15 - track_time_diff[i]) / 15 * 100)
track_sim.append(int(track[i] * 0.50) + int(track_time_sim * 0.50))
return int(name * 0.10) + int(artist * 0.10) + int(sum(track_sim) / len(mb_release['tracks']) * 0.80)
def submit_isrcs(mb_release, sp_release):
mbids = []
for track in mb_release['tracks']:
mbids.append(track['gid'])
isrcs = []
for track in sp_release['tracks']:
this_isrc = []
for extid in track['external-ids']:
if extid['type'] == 'isrc':
this_isrc.append(extid['id'].upper())
isrcs.append(this_isrc)
musicbrainzngs.submit_isrcs(dict(zip(mbids, isrcs)))
def make_html_comparison_page(mbrainz, spotify):
with codecs.open('compare.html', mode='w', encoding='utf-8') as f:
f.write('<html><head><meta charset="utf-8"></head><body><div style="float:left;width:50%">')
f.write('<div style="font-weight:bold">%s</div>' % mbrainz['name'])
f.write('<div style="font-weight:bold">%s</div>' % mbrainz['artist'])
for track in mbrainz['tracks']:
f.write('<div>%s: %s (%s)</div>' %
(track['position'],
track['name'],
int(track['length'] if track['length'] is not None else 0)))
f.write('</div><div style="float:right;width:50%">')
f.write('<div style="font-weight:bold">%s</div>' % spotify['name'])
f.write('<div style="font-weight:bold">%s</div>' % spotify['artist'])
for track in spotify['tracks']:
f.write('<div>%s-%s: %s (%s)</div>' %
(track['disc-number'], track['track-number'],
track['name'], int(track['length'] * 1000)))
f.write('</div></body></html>')
def save_processing(mb_release):
if mb_release['processed'] is None:
db.execute("INSERT INTO bot_isrc_spotify (release) VALUES (%s)", (mb_release['gid']))
else:
db.execute("UPDATE bot_isrc_spotify SET processed = now() WHERE release = %s", (mb_release['gid']))
sws = SpotifyWebService()
musicbrainzngs.auth(cfg.MB_USERNAME, cfg.MB_PASSWORD)
for release in db.execute(query_releases_wo_isrcs):
mb_release = dict(release)
colored_out(bcolors.OKBLUE, 'Looking up release "%s" http://musicbrainz.org/release/%s' % (mb_release['name'], mb_release['gid']))
sp_albums = sws.search_albums('upc:%s' % mb_release['barcode'])
if len(sp_albums) != 1:
if len(sp_albums) == 0:
out(' * no spotify release found')
if len(sp_albums) > 1:
out(' * multiple spotify releases found')
save_processing(mb_release)
continue
sp_uri = sp_albums[0]['href']
sp_release = sws.lookup(sp_uri, detail=2)
for track in sp_release['tracks']:
for extid in track['external-ids']:
if extid['type'] == 'isrc':
if extid['id'].upper()[:2] == 'TC':
print 'TuneCore song IDs detected! Bailing out'
save_processing(mb_release)
continue
mb_release['tracks'] = []
for mb_track in db.execute(query_tracks % (mb_release['id'],)):
mb_release['tracks'].append(mb_track)
make_html_comparison_page(mb_release, sp_release)
sim = compare_data(mb_release, sp_release)
out(' * comparing with %s: metadata matched %d%%' % (sp_uri, sim))
if sim < 85:
out(' * not enough similarity => skipping')
else:
out(' * submitting ISRCs')
submit_isrcs(mb_release, sp_release)
save_processing(mb_release)