-
Notifications
You must be signed in to change notification settings - Fork 0
/
deepspeechSTT.py
106 lines (85 loc) · 2.01 KB
/
deepspeechSTT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import deepspeech, numpy as np, sys, os, pyaudio, time
from playsound import playsound
pv=sys.version_info[0]
if pv<3:
raise Exception("Too old python")
def play(filep):
try:
playsound(filep)
except:
os.system("paplay {}".format(repr(filep)))
def stt(lang,verbose=False):
start=time.time()
model=deepspeech.Model("deepspeechModels/"+lang+"/"+lang+".pbmm")
model.enableExternalScorer("deepspeechModels/"+lang+"/"+lang+".scorer")
modelRate = model.sampleRate()
audio = pyaudio.PyAudio()
global stream
global ds_stream
global text_so_far
global isSilence
global silenceStart
global silenceDtoStop
ds_stream=model.createStream()
text_so_far=""
isSilence=False
silenceDtoStop=1.5
def process_audio(in_data, frame_count, time_info, status):
global text_so_far
global ds_stream
global isSilence
global silenceStart
global silenceDtoStop
global stream
data16=np.frombuffer(in_data,dtype=np.int16)
ds_stream.feedAudioContent(data16)
text=ds_stream.intermediateDecode()
if text!=text_so_far:
isSilence=False
text_so_far=text
else:
if not isSilence:
isSilence=True
silenceStart=time.time()
else:
silenceD=time.time()-silenceStart
if silenceD>=silenceDtoStop and text_so_far!="":
stream.close()
return (in_data, pyaudio.paContinue)
stream = audio.open(
format = pyaudio.paInt16,
channels = 1,
rate = modelRate,
input = True,
frames_per_buffer = 1024,
stream_callback=process_audio
)
if verbose:
print("Ready in {}s".format(time.time()-start))
play("beep.ogg")
stream.start_stream()
try:
while stream.is_active():
time.sleep(.1)
except KeyboardInterrupt:
pass
except:
pass
stream.stop_stream()
stream.close()
audio.terminate()
text=ds_stream.finishStream()
return(text)
if __name__=="__main__":
if len(sys.argv)>1:
lang=sys.argv[1]
else:
lang="eng"
verbose=False
if len(sys.argv)>2:
if sys.argv[2]=="verbose":
verbose=True
text=stt(lang,verbose)
print("----\n"+text)