-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathspeecht-cli
executable file
·221 lines (190 loc) · 10.7 KB
/
speecht-cli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
# Copyright 2017 Louis Kirsch. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import argparse
import os
from lazy import lazy
class CLI:
def __init__(self):
self.parser = argparse.ArgumentParser()
self.subparsers = self.parser.add_subparsers(help='sub-command help', dest='command')
self.base_parser = self._create_base_parser()
self._add_training_parser()
self._add_evaluation_parser()
self._add_recording_parser()
self._add_parameter_search_parser()
self._add_preprocess_parser()
self._add_export_parser()
def _create_base_parser(self):
base_parser = argparse.ArgumentParser(add_help=False)
base_parser.add_argument('--mfcc', dest='feature_type', action='store_const', const='mfcc',
help='Use a mfccs as input.')
base_parser.add_argument('--power', dest='feature_type', action='store_const', const='power',
help='Use a power spectrogram as input.')
base_parser.add_argument('--batch-size', dest='batch_size', type=int, default=64,
help='Batch size to use.')
base_parser.add_argument('--run-name', dest='run_name', type=str, default='noname',
help='Give this training a name to appear in tensorboard.')
base_parser.add_argument('--data-dir', dest='data_dir', type=str, default='data',
help='Data directory.')
base_parser.add_argument('--train-dir', dest='train_dir', type=str, default='train',
help='Training directory to store the runs in.')
base_parser.add_argument('--log-dir', dest='log_dir', type=str, default='log',
help='Log directory to log the runs in.')
base_parser.set_defaults(feature_type='power')
return base_parser
def _add_export_parser(self):
export_parser = self.subparsers.add_parser('export', help='Export network details',
parents=[self.base_parser])
export_parser.add_argument('--weights', dest='export_weights_dir', type=str,
help='Store the weights in numpy arrays')
export_parser.add_argument('--input-size', dest='input_size', type=int, default=128,
help='The input size of each sample, depending on what preprocessing was used')
def _add_training_parser(self):
training_parser = self.subparsers.add_parser('train', help='Train the wav2letter weights.',
parents=[self.base_parser])
training_parser.add_argument('--learning-rate', dest='learning_rate', type=float, default=1e-4,
help='The initial learning rate.')
training_parser.add_argument('--reset-learning-rate', dest='reset_learning_rate', action='store_true',
help='Reset the learning rate to the default or provided value.')
training_parser.add_argument('--learning-rate-decay-factor', dest='learning_rate_decay_factor',
type=float, default=0,
help='Enable learning rate decay, decays by the given factor.')
training_parser.add_argument('--momentum', dest='momentum', type=float, default=0.9,
help='Optimizer momentum.')
training_parser.add_argument('--max-gradient-norm', dest='max_gradient_norm', type=float, default=5.0,
help='Clip gradients to this norm.')
training_parser.add_argument('--limit-training-set', dest='limit_training_set', type=int, default=0,
help='Train on a smaller training set, limited to the specified size')
training_parser.add_argument('--steps-per-checkpoint', dest='steps_per_checkpoint', type=int, default=1000,
help='How many training steps to do per checkpoint.')
def _add_language_model_argument(self, parser: argparse.ArgumentParser):
parser.add_argument('--language-model', dest='language_model', type=str,
help='Use beam search with given language model. '
'Specify a directory containing `kenlm-model.binary`, '
'`vocabulary` and `trie`. '
'Language model must be binary format with probing hash table.')
parser.add_argument('--lm-weight', dest='lm_weight', type=float, default=0.8,
help='The weight multiplied with the language model score')
parser.add_argument('--word-count-weight', dest='word_count_weight', type=float, default=0.0,
help='The weight added for each word')
parser.add_argument('--valid-word-count-weight', dest='valid_word_count_weight', type=float, default=2.3,
help='The weight added for each in vocabulary word')
def _add_evaluation_parser(self):
evaluation_parser = self.subparsers.add_parser('evaluate', help='Evaluate the development or test set.',
parents=[self.base_parser])
evaluation_parser.add_argument('--dev', dest='dataset', action='store_const', const='dev',
help='Use the development dataset.')
evaluation_parser.add_argument('--test', dest='dataset', action='store_const', const='test',
help='Use the test dataset.')
evaluation_parser.add_argument('--no-save', dest='should_save', action='store_false',
help='Do not save evaluation')
evaluation_parser.add_argument('--step-count', dest='step_count', type=int, default=0,
help='Number of steps to evaluate')
self._add_language_model_argument(evaluation_parser)
evaluation_parser.set_defaults(dataset='test')
def _add_recording_parser(self):
recording_parser = self.subparsers.add_parser('record', help='Record using your microphone and inspect '
'the transcription.',
parents=[self.base_parser])
recording_parser.add_argument('--input-size', dest='input_size', type=int, default=128,
help='The input size of each sample, depending on what preprocessing was used')
self._add_language_model_argument(recording_parser)
def _add_preprocess_parser(self):
preprocess_parser = self.subparsers.add_parser('preprocess', help='Preprocess and cache all audio.',
parents=[self.base_parser])
preprocess_parser.add_argument('--train-only', dest='train_only', action='store_true',
help='Preprocess only training data')
preprocess_parser.add_argument('--test-only', dest='test_only', action='store_true',
help='Preprocess only test data')
preprocess_parser.add_argument('--dev-only', dest='dev_only', action='store_true',
help='Preprocess only development data')
def _add_parameter_search_parser(self):
parameter_search_parser = self.subparsers.add_parser('search', help='Search for language model hyper parameters'
'using local search.',
parents=[self.base_parser])
parameter_search_parser.add_argument('--population-size', dest='population_size', type=int, default=10,
help='The size of the population for the local search.')
parameter_search_parser.add_argument('--noise-std', dest='noise_std', type=float, default=0.5,
help='The standard deviation of the normal noise for mutation.')
parameter_search_parser.add_argument('--ui', dest='use_ui', action='store_true',
help='Whether to use an UI to print results.')
self._add_language_model_argument(parameter_search_parser)
@lazy
def parsed(self):
parsed = self.parser.parse_args()
if not parsed.command:
return parsed
if parsed.command == 'train':
parsed.run_type = 'train'
elif parsed.command == 'evaluate':
parsed.run_type = parsed.dataset
elif parsed.command == 'record':
parsed.run_type = 'record'
else:
parsed.run_type = 'other'
parsed.run_train_dir = parsed.train_dir + '/' + parsed.run_name
return parsed
@staticmethod
def _get_training_executor(flags):
import speecht.training
return speecht.training.Training(flags)
@staticmethod
def _get_evaluation_executor(flags):
import speecht.evaluation
return speecht.evaluation.Evaluation(flags)
@staticmethod
def _get_recording_executor(flags):
import speecht.recording
return speecht.recording.Recording(flags)
@staticmethod
def _get_search_executor(flags):
import speecht.parameter_search
return speecht.parameter_search.LanguageModelParameterSearch(flags)
@staticmethod
def _get_preprocessing_executor(flags):
import speecht.preprocessing
return speecht.preprocessing.Preprocessing(flags)
@staticmethod
def _get_export_executor(flags):
import speecht.exporting
return speecht.exporting.Exporting(flags)
@lazy
def command_executor(self):
return {
'train': self._get_training_executor,
'evaluate': self._get_evaluation_executor,
'record': self._get_recording_executor,
'search': self._get_search_executor,
'preprocess': self._get_preprocessing_executor,
'export': self._get_export_executor
}[self.parsed.command](self.parsed)
def run(self):
if not self.parsed.command:
self.parser.print_help()
else:
self._ensure_directories()
self.command_executor.run()
def _ensure_directories(self):
directories = [self.parsed.train_dir,
self.parsed.data_dir,
self.parsed.log_dir,
self.parsed.run_train_dir]
for directory in directories:
if not os.path.exists(directory):
os.makedirs(directory)
if __name__ == "__main__":
cli = CLI()
cli.run()