-
Notifications
You must be signed in to change notification settings - Fork 0
/
05_ Recommendation_System.py
230 lines (205 loc) · 9.54 KB
/
05_ Recommendation_System.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import torchaudio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio.transforms as transforms
from src.audio_folder_dataset import AudioFolder
from src.audio_folder_collate_fn import collate_audio_folder_batch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing
import pandas as pd
class M5(nn.Module):
def __init__(self, n_input=2, n_output=11, stride=16, n_channel=32):
super().__init__()
#first convolutional layer
self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
#batch normalization
self.bn1 = nn.BatchNorm1d(n_channel)
#max pooling
self.pool1 = nn.MaxPool1d(4, 4)
#second convolutional layer
self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
#batch normalization
self.bn2 = nn.BatchNorm1d(n_channel)
#max pooling
self.pool2 = nn.MaxPool1d(4)
#third convolutional layer
self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
#batch normalization
self.bn3 = nn.BatchNorm1d(2 * n_channel)
#max pooling
self.pool3 = nn.MaxPool1d(4)
#fully connected layer
self.fc1 = nn.Linear(2 * n_channel, n_output)
def forward(self, x):
x = self.conv1(x)
#reLU activation function after batch normalization
x = F.relu(self.bn1(x))
x = self.pool1(x)
x = self.conv2(x)
x = F.relu(self.bn2(x))
x = self.pool2(x)
x = self.conv3(x)
x = F.relu(self.bn3(x))
x = self.pool3(x)
x = F.avg_pool1d(x, x.shape[-1])
x = x.permute(0, 2, 1)
x = self.fc1(x)
#squeeze the second dimension
x = torch.squeeze(x, 1)
return x
#load the trained model
model = M5()
model.load_state_dict(torch.load('best_audio_classifier.pt'))
#set the model to evaluation mode
model.eval()
#load the trained model
model = M5()
model.load_state_dict(torch.load('best_audio_classifier.pt'))
#set the model to evaluation mode
model.eval()
#audio preprocessing function
def preprocess_audio(file_path):
#load the audio file and get waveform and sample rate using torchaudio,ref:https://stackoverflow.com/questions/71108331/torchaudio-load-audio-with-specific-sampling-rate
waveform, sample_rate = torchaudio.load(file_path)
#perform appropriate transformations based on model requirements
new_sample_rate = 8000
transform = transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
waveform = transform(waveform)
#ensure the waveform has two channels
if waveform.shape[0] == 1:
waveform = torch.cat([waveform, waveform], dim=0)
return waveform
def classify_audio(file_path):
#disables gradient calculation to save memory and computations,ref:https://discuss.pytorch.org/t/question-about-use-of-torch-max-function-to-calculate-accuracy/187500
with torch.no_grad():
#initialize variables for correctness and total predictions
total = 0
#load and preprocess the audio file
waveform = preprocess_audio(file_path)
#add a batch dimension to match model input
waveform = waveform.unsqueeze(0)
#pass the processed waveform through the model
outputs = model(waveform)
#find the class with the highest probability
_, predicted = torch.max(outputs, 1)
#increment total predictions
total += 1
#return the predicted class index as an integer
return predicted.item()
#define GUI functions,Ref:https://stackoverflow.com/questions/54785138/how-to-access-a-desired-path-with-filedialog-askopenfilename-in-tkinter
def audio_file():
file_path = filedialog.askopenfilename()
#set the text of the genre_label to display the selected file's path,ref:https://www.geeksforgeeks.org/how-to-change-the-tkinter-label-text/
genre_label.config(text=f"Selected File: {file_path}")
#load the dataset
data = pd.read_csv('data/features_3_sec.csv')
#create a label encoder,ref:https://stackoverflow.com/questions/61467312/application-function-labelencoder-fit-transform-in-python?rq=3
label_encoder = LabelEncoder()
#encode the labels
data['label'] = label_encoder.fit_transform(data['label'])
#split data into features and labels,ref:https://www.kaggle.com/code/andradaolteanu/work-w-audio-data-visualise-classify-recommend/notebook#Introduction
y = data['label']
#features of the dataset, excluding the label column
X = data.loc[:, data.columns != 'label']
#assuming 'filename' is a column with audio file names
X = X.drop(columns=['filename'])
#split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#train a classifier,initialize the KNN classifier with 19 neighbors
knn = KNeighborsClassifier(n_neighbors=19)
#fit the classifier to the training data
knn.fit(X_train, y_train)
#load the dataset, setting 'filename' as the index column
data = pd.read_csv(f'data/features_30_sec.csv', index_col='filename')
#create a DataFrame with just the labels for each track
labels = data[['label']]
#remove 'length' and 'label' columns as they are not features
data = data.drop(columns=['length','label'])
#standardize the feature values
data_scaled=preprocessing.scale(data)
#compute similarity as a 2D numpy array
similarity = cosine_similarity(data_scaled)
#convert to DataFrame for easier manipulation
sim_df_labels = pd.DataFrame(similarity)
#set the row index to be the song filenames
sim_df_names = sim_df_labels.set_index(labels.index)
#set the column names to be the song filenames, making it easy to identify songs
sim_df_names.columns = labels.index
def find_similar_songs(name):
# Find songs most similar to another song
series = sim_df_names[name].sort_values(ascending=False)
# Remove cosine similarity == 1 (songs will always have the best match with themselves)
series = series.drop(name)
# Return the 5 top matches as a list
return series.head(5).index.tolist()
def classify_and_recommend():
#retrieve the file path selected by the user in the GUI,ref:https://stackoverflow.com/questions/63871376/tkinter-widget-cget-variable
file_path = genre_label.cget("text").split(": ")[1]
#check if a file has been selected
if file_path:
#classify the selected audio file's genre using the pre-trained model
predicted_genre_index = classify_audio(file_path)
#use LabelEncoder to reverse transform the predicted index to get the genre name,ref:https://stackoverflow.com/questions/52870022/inverse-transform-method-labelencoder
predicted_genre_label = label_encoder.inverse_transform([predicted_genre_index])[0]
#update the GUI to display the predicted genre
predicted_genre.config(text=f"Predicted Genre: {predicted_genre_label}")
#extract the song name from the file path for finding similar songs
song_name = file_path.split('/')[-1]
#find and return a list of similar songs based on the song name
similar_songs = find_similar_songs(song_name)
#display the list of recommended similar songs in a new window
show(root, similar_songs)
## Class for displaying recommendations dialog,ref:https://stackoverflow.com/questions/37219191/how-to-return-the-selected-items-of-a-listbox-when-using-wait-window-in-tkinte
class MyDialog(object):
def __init__(self, parent, similar_songs):
#initialize a top-level window for recommendations
self.toplevel = tk.Toplevel(parent)
#set the title of the top-level window
self.toplevel.title("Similar Song Recommendations")
#create a listbox to display recommended songs
self.listbox = tk.Listbox(self.toplevel)
#pack the listbox to fill and expand within the window
self.listbox.pack(side="top", fill="x")
#insert similar songs into the listbox
for song in similar_songs:
#insert each song at the end of the listbox
self.listbox.insert(tk.END, song)
#function to display recommendations
def show(parent, similar_songs):
#create RecommendationsDialog instance with parent window and similar songs
MyDialog(parent, similar_songs)
if __name__ == "__main__":
#create the main GUI window
root = tk.Tk()
#set the title of the main window
root.title("Music Genre Classifier & Recommender")
#button to browse files
browse_button = tk.Button(root, text="Browse", command=audio_file)
#pack the browse button into the main window
browse_button.pack()
#label to display selected file
genre_label = tk.Label(root, text="Selected Audio: ")
#pack the label to display selected file into the main window
genre_label.pack()
#button to classify and recommend
classify_button = tk.Button(root, text="Classify & Recommend", command=lambda: classify_and_recommend())
#pack the classify and recommend button into the main window
classify_button.pack()
#label to display predicted genre
predicted_genre = tk.Label(root, text="Predicted Genre: ")
#pack the label to display predicted genre into the main window
predicted_genre.pack()
#run the GUI
root.mainloop()