-
Notifications
You must be signed in to change notification settings - Fork 1
/
buildset.py
113 lines (78 loc) · 3.63 KB
/
buildset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import librosa
import os
from tqdm import tqdm
import preprocess
import torch.utils.data as utils
import numpy as np
import torch
SAMPLING_DURATION = 16000
COMMANDS = ["one","two","three"]
def build_dataset():
dataset = []
dataset_append = dataset.append
audio_preproc = preprocess.AudioPreprocessor()
librosa_load = librosa.load # For performance purpose.
os_path_join = os.path.join # For performance purpose
for folder in tqdm(next(os.walk('../speech_commands/'))[1]):
tmp = []
tmp_append = tmp.append
speakers = {}
if folder != "_background_noise_":
commande = int(folder in COMMANDS)
for filename in tqdm(os.listdir('../speech_commands/'+folder)):
if not commande:
prob_skip = np.random.rand()
if prob_skip > 0.2:
continue
y, _ = librosa_load(os_path_join("../speech_commands/"+folder+"/"+filename), sr=None)
if len(y) < SAMPLING_DURATION:
y = audio_preproc.pad_short_data(y, up_to=SAMPLING_DURATION)
S = audio_preproc.compute_mfccs(y)
tmp_append([S, np.array([commande])])
dataset += tmp
return dataset
def dataset_augment():
dataset = []
dataset_append = dataset.append
audio_preproc = preprocess.AudioPreprocessor()
librosa_load = librosa.load # For performance purpose.
os_path_join = os.path.join # For performance purpose
for folder in tqdm(COMMANDS):
tmp = []
tmp_append = tmp.append
if folder != "_background_noise_":
commande = 1
for filename in tqdm(os.listdir('../speech_commands/'+folder)):
y, _ = librosa_load(os_path_join("../speech_commands/"+folder+"/"+filename), sr=None)
if len(y) < SAMPLING_DURATION:
y = audio_preproc.pad_short_data(y, up_to=SAMPLING_DURATION)
S = audio_preproc.compute_mfccs(y)
tmp_append([S, np.array([commande])])
for padding in ['doing_the_dishes', 'dude_miaowing', 'exercise_bike', 'pink_noise', 'running_tap', 'white_noise']:
y_aug = audio_preproc.noise(y, up_to=SAMPLING_DURATION, padding=padding)
S = audio_preproc.compute_mfccs(y_aug)
tmp_append([S, np.array([commande])])
dataset += tmp
return dataset
# ds = build_dataset()
# np.save('../dataset_40mfcc_16k', ds)
#ds = np.load('../dataset_40mfcc_16k.npy')
def split(dataset, train_prop=0.8, val_prop=0.1, test_prop=0.1):
N = len(dataset)
shuffled_indices = list(range(N))
np.random.shuffle(shuffled_indices)
split_1, split_2 = int(train_prop*N), int((train_prop+val_prop)*N)
train_indices, val_indices, test_indices = shuffled_indices[:split_1], shuffled_indices[split_1:split_2], shuffled_indices[split_2:]
train_set = []
val_set = []
test_set = []
tensor_x = torch.stack([torch.Tensor(dataset[i][0]) for i in train_indices])
tensor_y = torch.stack([torch.Tensor(dataset[i][1]) for i in train_indices])
train_set = utils.TensorDataset(tensor_x, tensor_y)
tensor_x = torch.stack([torch.Tensor(dataset[i][0]) for i in val_indices])
tensor_y = torch.stack([torch.Tensor(dataset[i][1]) for i in val_indices])
val_set = utils.TensorDataset(tensor_x, tensor_y)
tensor_x = torch.stack([torch.Tensor(dataset[i][0]) for i in test_indices])
tensor_y = torch.stack([torch.Tensor(dataset[i][1]) for i in test_indices])
test_set = utils.TensorDataset(tensor_x, tensor_y)
return train_set, val_set, test_set