-
Notifications
You must be signed in to change notification settings - Fork 7
/
index.js
executable file
·246 lines (229 loc) · 10.1 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#!/usr/bin/env node
const {calculateFromFile, printResult} = require('./lib/benchmark');
const commandLineArgs = require('command-line-args');
const commandLineUsage = require('command-line-usage');
const fs = require('fs');
const printUsage = (errors = []) => {
const errorSection = {
header: 'Errors detected',
content: []
};
errors.forEach(err => {
if (err && err.message) {
errorSection.content.push(err.message);
}
});
const sections = [
{
header: 'Speech Recognition Evaluation Utility',
content: 'A simple utility to quickly evaluate any {italic Speech-to-Text (STT)} or {italic Automated Speech Recognition (ASR)} system.'
},
{
header: 'Synopsis',
content: [
'$ asr-eval {bold --original} {underline file} {bold --generated} {underline file}',
'$ asr-eval [{underline options}] {bold --original} {underline file} {bold --generated} {underline file}',
'$ asr-eval {bold --help}'
],
},
{
header: 'Options',
optionList: [
{
name: 'original',
alias: 'o',
type: String,
typeLabel: '{underline file}',
description: 'Original File to be used as reference. Usually, this should be the transcribed file by a Human being.'
},
{
name: 'generated',
alias: 'g',
type: String,
typeLabel: '{underline file}',
description: 'File with the output generated by Speech Recognition System.'
},
{
name: 'wer',
alias: 'e',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Print Word Error Rate (WER).'
},
{
name: 'wil',
alias: 'i',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Print Word Information Loss (WIL).'
},
{
name: 'distance',
type: String,
typeLabel: '[true|false]',
description: 'Default: false. Print total word distance after comparison.'
},
{
name: 'stats',
type: String,
typeLabel: '[true|false]',
description: 'Default: false. Print statistics about original and generate files, before and after pre-processing. ' +
'Also prints statistics about word level and phrase level differences.'
},
{
name: 'pairs',
type: String,
typeLabel: '[true|false]',
description: 'Default: false. Print all the difference pairs with type of difference.'
},
{
name: 'textcomparison',
type: String,
typeLabel: '[true|false]',
description: 'Default: false. Print the text comparison between two files with highlighting.'
},
{
name: 'removespeakers',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove the speaker at the start of each line in files before calculations. ' +
'The speaker should be separated by colon ":" i.e. {italic speaker_name}: {italic text} ' +
'For e.g. "John Doe: Hello, I am John." would get converted to simply "Hello, I am John."'
},
{
name: 'removeannotations',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove any custom annotations in the transcript before calculations. This is useful when removing custom annotations done by human transcribers. ' +
' Anything in square brackets {italic []} are detected as annotations.' +
' For e.g. "Hello, I am [inaudible 00:12] because of few reasons." would get converted to "Hello, I am because of few reasons." '
},
{
name: 'removewhitespaces',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove any extra white spaces before calculations.'
},
{
name: 'removequotes',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove any double quotes \'"\' from the files before calculations.'
},
{
name: 'removedashes',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove any dashes (hyphens) "-" from the files before calculations.'
},
{
name: 'removepunctuations',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Remove any punctuations ".,?!" from the files before calculations.'
},
{
name: 'lowercase',
type: String,
typeLabel: '[true|false]',
description: 'Default: true. Convert both files to lower case before calculations. ' +
'This is useful if evaluation needs to be done in case-insensitive way.'
},
{
name: 'help',
type: String,
typeLabel: '[true|false]',
description: 'Print this usage guide.'
}
]
},
{
content: 'Project home: {underline https://github.com/symblai/speech-recognition-evaluation}'
}
];
let _sections = sections;
if (errorSection.content.length > 0) {
_sections = [errorSection, ...sections];
}
console.log(commandLineUsage(_sections));
};
const optionDefinitions = [
{ name: 'original', alias: 'o', type: String, multiple: false },
{ name: 'generated', alias: 'g', type: String, multiple: false },
{ name: 'removespeakers', alias: 's', type: String, defaultValue: 'true'},
{ name: 'removeannotations', alias: 'a', type: String, defaultValue: 'true'},
{ name: 'removewhitespaces', alias: 'w', type: String, defaultValue: 'true'},
{ name: 'removequotes', alias: 'q', type: String, defaultValue: 'true'},
{ name: 'removedashes', alias: 'd', type: String, defaultValue: 'true'},
{ name: 'removepunctuations', alias: 'p', type: String, defaultValue: 'true'},
{ name: 'lowercase', alias: 'l', type: String, defaultValue: 'true'},
{ name: 'wer', alias: 'e', type: String, defaultValue: 'true'},
{ name: 'wil', alias: 'i', type: String, defaultValue: 'true'},
{ name: 'distance', type: String, defaultValue: 'false'},
{ name: 'stats', alias: 't', type: String, defaultValue: 'false'},
{ name: 'pairs', type: String, defaultValue: 'false'},
{ name: 'textcomparison', alias: 'c', type: String, defaultValue: 'false'},
{ name: 'help', alias: 'h', type: Boolean}
];
const parseBoolean = (string = '') => {
if (typeof string === 'boolean') return string;
else {
const _string = string.toLowerCase().trim();
if (_string === 'true') return true;
else if (_string === 'false') return false;
else return void 0;
}
};
const convertStringArgToBoolean = (string) => {
if (string === null || string === undefined) {
return true;
}
return parseBoolean(string);
};
try {
const options = commandLineArgs(optionDefinitions);
function validateOptions(options) {
const {original, generated} = options;
const errors = [];
if (!original || !fs.existsSync(original)) {
errors.push({
message: `File path provided in --original '${original}' is invalid or doesn't exist.`
});
}
if (!generated || !fs.existsSync(generated)) {
errors.push({
message: `File path provided in --generated '${generated}' is invalid or doesn't exist.`
});
}
return errors;
}
const {original, generated, help} = options;
if (help) {
printUsage();
} else {
const errors = validateOptions(options);
if (errors.length > 0) {
printUsage(errors);
} else {
let result = calculateFromFile(original, generated, {
removeSpeakerStarters: convertStringArgToBoolean(options.removespeakers),
removeAnnotations: convertStringArgToBoolean(options.removeannotations),
removeExtraWhiteSpaces: convertStringArgToBoolean(options.removewhitespaces),
removeQuotes: convertStringArgToBoolean(options.removequotes),
removeDashes: convertStringArgToBoolean(options.removedashes),
removePunctuations: convertStringArgToBoolean(options.removepunctuations),
lowerCase: convertStringArgToBoolean(options.lowercase)
});
printResult(result, {
wer: convertStringArgToBoolean(options.wer),
wil: convertStringArgToBoolean(options.wil),
distance: convertStringArgToBoolean(options.distance),
stats: convertStringArgToBoolean(options.stats),
diffPairs: convertStringArgToBoolean(options.pairs),
textComparison: convertStringArgToBoolean(options.textcomparison)
});
}
}
} catch (e) {
printUsage([e]);
}