forked from hhhaaahhhaa/TTS-systems
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluation.py
More file actions
76 lines (65 loc) · 2.16 KB
/
Copy pathevaluation.py
File metadata and controls
76 lines (65 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import glob
import re
import jiwer
import speech_recognition as sr
from tqdm import tqdm
import Define
TAG_MAPPING = {
"google": { # https://stackoverflow.com/questions/14257598/what-are-language-codes-in-chromes-implementation-of-the-html5-speech-recogniti/14302134#14302134
"en": "en",
"zh": "zh",
"ko": "ko",
"jp": "ja",
"fr": "fr",
"de": "de",
"es": "es",
"ru": "ru",
},
"whisper": { # https://github.com/openai/whisper/blob/main/whisper/tokenizer.py
"en": "en-US",
"zh": "zh-CN",
"ko": "ko",
"jp": "ja",
"fr": "fr-FR",
"de": "de-DE",
"es": "es-ES",
"ru": "ru",
},
}
r = sr.Recognizer()
def whisper(wav_path, lang: str):
with sr.AudioFile(wav_path) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Whisper
try:
res = r.recognize_whisper(audio, model='large', language=TAG_MAPPING["whisper"][lang])
# res = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS, language="ko-KR")
return res
except sr.UnknownValueError:
print("Whisper could not understand audio")
except sr.RequestError as e:
print("Whisper error; {0}".format(e))
return ""
def google(wav_path, lang):
with sr.AudioFile(wav_path) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Google API
try:
res = r.recognize_google(audio, key=None, language=TAG_MAPPING["google"][lang])
return res
except sr.UnknownValueError:
print("Google could not understand audio")
except sr.RequestError as e:
print("Google error; {0}".format(e))
return ""
def cer(raw_text, pred_text, remove_whitespace=False):
raw_text = re.sub(r'[^\w\s]', '', raw_text)
pred_text = re.sub(r'[^\w\s]', '', pred_text)
if remove_whitespace:
raw_text = raw_text.replace(' ', '')
pred_text = pred_text.replace(' ', '')
raw_text = raw_text.upper()
pred_text = pred_text.upper()
cer = jiwer.cer(raw_text, pred_text)
return cer