-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathpersian_speech_to_text.py
47 lines (40 loc) · 1.5 KB
/
persian_speech_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import subprocess
import wave
import sys
import os
class SpeechToText:
def __init__(self):
try:
from vosk import Model, KaldiRecognizer, SetLogLevel
print("module 'vosk' is installed")
except ModuleNotFoundError:
print("module 'vosk' is not installed")
# or
subprocess.check_call([sys.executable, "-m", "pip", "install", "vosk==0.3.30"])
SetLogLevel(-1)
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
def get_text_vosk(self, filename):
from vosk import Model, KaldiRecognizer
wf = wave.open(filename , "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
#clean file
final_text = ""
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
string = rec.Result()
text = string[string.find('"text"')+10:-3] + " "
final_text += text
string = rec.FinalResult()
text = string[string.find('"text"')+10:-3]
final_text += text
return final_text