-
Notifications
You must be signed in to change notification settings - Fork 0
/
WAD_self.py
60 lines (48 loc) · 2.58 KB
/
WAD_self.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
from speechbrain.pretrained import EncoderDecoderASR
from jiwer import wer
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-transformer-transformerlm-librispeech",
savedir="pretrained_models/asr-transformer-transformerlm-librispeech",
run_opts={"device":"cuda"})
def transcribe_audio(audio_path):
words = asr_model.transcribe_file(audio_path)
return words
original_audio_folder = os.getcwd() + "/selfSampling/"
processed_audio_folder = os.getcwd() + "/processedSelfSampling/"
for root, dirs, files in os.walk(original_audio_folder):
if any(file.endswith(".wav") for file in files):
parts = root.split(os.sep)
if len(parts) < 3:
continue
speaker_id, recording_id = parts[-2], parts[-1]
txt_filename = f"{speaker_id}-{recording_id}.trans.txt"
txt_path = os.path.join(root, txt_filename)
if os.path.exists(txt_path):
with open(txt_path, 'r') as f:
lines = f.read().strip().split('\n')
text_dict = {line.split(' ')[0]: ' '.join(line.split(' ')[1:]) for line in lines}
else:
print(f"Text file not found for audio in {root}")
continue
for file in files:
if file.endswith('.wav'):
utterance_id = file.split('.')[0]
if utterance_id in text_dict:
original_text = text_dict[utterance_id]
else:
print(f"Transcription not found for {file}")
continue
original_audio_path = os.path.join(root, file)
processed_audio_filename = 'processed_' + file
processed_audio_path = os.path.join(processed_audio_folder, processed_audio_filename)
original_transcribed_text = transcribe_audio(original_audio_path)
processed_transcribed_text = transcribe_audio(processed_audio_path)
original_wer = wer(original_text, original_transcribed_text)
processed_wer = wer(original_text, processed_transcribed_text)
WAD = processed_wer - original_wer
with open('WAD_results_self.txt', 'a') as f:
print(f"Original audio: {file}", file=f)
print(f"Original word error rate: {original_wer}", file=f)
print(f"Processed word error rate: {processed_wer}", file=f)
print("WAD: ", WAD, file=f)
print("", file=f)