-
Notifications
You must be signed in to change notification settings - Fork 6
/
beatport.py
100 lines (86 loc) · 3.14 KB
/
beatport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import json
import re
import sys
import requests
from bs4 import BeautifulSoup
from config import genres
# Since genre list is calculated in javascript, we need to render the page to get it
def render_page(url):
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get(url)
time.sleep(3)
r = driver.page_source
driver.quit()
return r
def get_chart_genres():
available_genres = dict()
r = requests.get("https://www.beatport.com/charts")
soup = BeautifulSoup(r.text)
genre_list_items = (
soup.find("div", {"class": "bucket genre-list"})
.find("ul", {"class": "bucket-items"})
.find_all("li")
)
for genre in genre_list_items:
available_genres[genre.find("a").text] = genre.find("a").get("href")
return available_genres
def get_genres():
available_genres = {"All Genres": ""}
r = render_page("https://www.beatport.com/")
soup = BeautifulSoup(r, "html.parser")
links = soup.find_all(
"a", {"data-testid": re.compile(r"header-subnav-link-genre\d*")}
)
for genre in links:
available_genres[genre.text] = genre.get("href").replace("/genre/", "")
return available_genres
def get_top_100_playables(genre):
# Realistic headers to circumvent lockout for too many requests
HEADERS = {
"User-Agent": "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
}
r = requests.get(
"https://www.beatport.com/{}/{}/top-100".format(
"genre" if genres[genre] else "", genres[genre]
),
headers=HEADERS,
)
if r.status_code == 403:
print("Forbidden error:")
print(r.text)
sys.exit()
soup = BeautifulSoup(r.text, "html.parser")
next_data = soup.find("script", {"id": "__NEXT_DATA__"})
return json.loads(next_data.contents[0])["props"]["pageProps"]["dehydratedState"][
"queries"
][0]["state"]["data"]["results"]
def parse_tracks(tracks_json):
tracks = list()
for track in tracks_json:
tracks.append(
{
"title": (
track["release"]["name"]
if track["release"]["name"]
else track["name"]
),
"name": track["name"],
"mix": track["mix_name"],
"artists": [artist["name"] for artist in track["artists"]],
"remixers": [remixer["name"] for remixer in track["remixers"]],
"release": track["release"]["name"],
"published_date": track["publish_date"],
"duration": track["length"],
"duration_ms": track["length_ms"],
"genres": track["genre"]["name"],
"bpm": track["bpm"],
"key": track["key"]["name"],
}
)
return tracks
def get_top_100_tracks(genre):
print("[+] Fetching Top 100 {} Tracks".format(genre))
raw_tracks_dict = get_top_100_playables(genre)
return parse_tracks(raw_tracks_dict)