forked from nobody132/masr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature.py
36 lines (28 loc) · 870 Bytes
/
feature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import librosa
import wave
import numpy as np
import torch
sample_rate = 16000
window_size = 0.02
window_stride = 0.01
n_fft = int(sample_rate * window_size)
win_length = n_fft
hop_length = int(sample_rate * window_stride)
window = "hamming"
def load_audio(wav_path, normalize=True): # -> numpy array
with wave.open(wav_path) as wav:
wav = np.frombuffer(wav.readframes(wav.getnframes()), dtype="int16")
wav = wav.astype("float")
if normalize:
wav = (wav - wav.mean()) / wav.std()
return wav
def spectrogram(wav, normalize=True):
D = librosa.stft(
wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window
)
spec, phase = librosa.magphase(D)
spec = np.log1p(spec)
spec = torch.FloatTensor(spec)
if normalize:
spec = (spec - spec.mean()) / spec.std()
return spec