-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathVagrantfile
334 lines (275 loc) · 12 KB
/
Vagrantfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# -*- mode: ruby -*-
# vi: set ft=ruby
# you might want to install:
#vagrant-aws (0.3.0)
#vagrant-azure (2.0.0)
#vagrant-google (2.2.0)
#vagrant-sshfs (1.3.1)
#vagrant-vbguest (0.15.2)
Vagrant.configure("2") do |config|
# Default provider is VirtualBox!
# vagrant plugin install vagrant-vbguest
# If you want AWS, you need to populate and run e.g.
# . aws.sh; vagrant up --provider aws
# Make sure you don't check in aws.sh (maybe make a copy with your "secret" data)
# Before that, do
# vagrant plugin install vagrant-aws; vagrant plugin install vagrant-sshfs
# Similarly for vagrant-azure and vagrant-google
#config.vm.box = "ubuntu/trusty64"
config.vm.box = "bento/ubuntu-16.04"
#config.vm.box = "google/gce"
config.vm.synced_folder ".", "/vagrant", :mount_options => ["dmode=777", "fmode=777"]
config.vm.provider "virtualbox" do |vbox|
config.ssh.forward_x11 = true
# enable (uncomment) this for debugging output
#vbox.gui = true
# turn off annoying auto update guest additions
config.vbguest.auto_update = false
# host-only network on which web browser serves files
config.vm.network "private_network", ip: "192.168.56.101"
vbox.cpus = 4
vbox.memory = 8192
end
config.vm.provider "aws" do |aws, override|
aws.tags["Name"] = "Eesen Transcriber"
#aws.ami = "ami-663a6e0c" # Ubuntu ("Trusty") Server 14.04 LTS AMI - US-East region
aws.ami = "ami-e5d9439a" # Ubuntu ("Xenial") Server 16.04 LTS AMI - US-East region
aws.instance_type = "m3.xlarge"
override.vm.synced_folder ".", "/vagrant", type: "sshfs", ssh_username: ENV['USER'], ssh_port: "22", prompt_for_password: "true"
override.vm.box = "http://speech-kitchen.org/dummy.box"
# it is assumed these environment variables were set by ". aws.sh"
aws.access_key_id = ENV['AWS_KEY']
aws.secret_access_key = ENV['AWS_SECRETKEY']
aws.keypair_name = ENV['AWS_KEYPAIR']
override.ssh.username = "ubuntu"
override.ssh.private_key_path = ENV['AWS_PEM']
aws.terminate_on_shutdown = "true"
aws.region = ENV['AWS_REGION']
# https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#SecurityGroups
# Edit the security group on AWS Console; Inbound tab, add the HTTP rule
aws.security_groups = "launch-wizard-1"
#aws.subnet_id = "vpc-666c9a02"
aws.region_config "us-east-1" do |region|
#region.spot_instance = true
region.spot_max_price = "0.1"
end
# this works around the error from AWS AMI vm on 'vagrant up':
# No host IP was given to the Vagrant core NFS helper. This is
# an internal error that should be reported as a bug.
#override.nfs.functional = false
end
config.vm.provider "google" do |google, override|
#google.tags["Name"] = "Eesen Transcriber"
#google.google_project_id = "YOUR_GOOGLE_CLOUD_PROJECT_ID"
#google.google_client_email = "YOUR_SERVICE_ACCOUNT_EMAIL_ADDRESS"
#google.google_json_key_location = "/path/to/your/private-key.json"
google.image_family = 'ubuntu-1604-lts'
override.ssh.username = "ubuntu"
#override.ssh.private_key_path = "~/.ssh/id_rsa"
#override.ssh.private_key_path = "~/.ssh/google_compute_engine"
#override.vm.synced_folder ".", "/vagrant", type: "sshfs", ssh_username: ENV['USER'], ssh_port: "22", prompt_for_password: "true"
#google.terminate_on_shutdown = "true"
end
config.vm.provision "shell", inline: <<-SHELL
# change to 'kaldi' for Aspire models
# TOOLKIT='eesen'
TOOLKIT='kaldi'
# there are a few things we can do to reduce size, set to false if you experience problems
SHRINK=true
# if you experience fails during build, could add virtual memory
apt-get update
apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" upgrade
# sudo apt install swapspace -y
# these are the main packages we need
sudo apt-get install -y git make automake libtool libtool-bin autoconf patch subversion fuse\
libatlas3-base libatlas-base-dev libatlas-dev liblapack-dev sox openjdk-8-jre libav-tools g++\
zlib1g-dev libsox-fmt-all apache2 sshfs
# turn off release upgrade messages
sed -i s/Prompt=lts/Prompt=never/ /etc/update-manager/release-upgrades
rm -f /var/lib/ubuntu-release-upgrader/*
/usr/lib/ubuntu-release-upgrader/release-upgrade-motd
# turn off debconf prompting (annoying grub prompt)
export DEBIAN_FRONTEND=noninteractive
# silence error message from missing file
touch /home/${user}/.Xauthority
# fix dash-as-bash
rm /bin/sh
ln -s /bin/bash /bin/sh
# get the name of the default user
if grep --quiet vagrant /etc/passwd
then
user="vagrant"
else
user="ubuntu"
fi
# If you wish to train EESEN with a GPU machine, uncomment this section to install CUDA
# also uncomment the line that mentions cudatk-dir in the EESEN install section below
if false
then
cd /home/${user}
wget -nv http://speech-kitchen.org/vms/Data/cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb
dpkg -i cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb
rm cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb
apt-get update
apt-get remove --purge xserver-xorg-video-nouveau
apt-get install -y cuda
fi
if [ $TOOLKIT == 'kaldi' ]
then
# install Kaldi
cd /home/${user}
git clone https://github.com/kaldi-asr/kaldi
cd kaldi/tools
git reset --hard 70748308810f
extras/check_dependencies.sh
sed -i 's/openfst.cs.nyu.edu/www.openfst.org/' Makefile
sed -i 's/wget /wget -nv /' Makefile
make -kj `nproc` || make || exit 1;
cd ../src
./configure --shared
make depend
make -kj `nproc` || make || exit 1;
$SHRINK && find ../tools/sctk-2.4.10/src ../tools/openfst-1.6.2/src . -name '*.o' -exec rm {} \;
$SHRINK && find ../tools/openfst-1.6.2/src src/*bin -executable -type f | xargs -n 1 -P `nproc` gzexe
$SHRINK && find ../tools/openfst-1.6.2/src src/*bin -executable -type f -name '*~' -exec rm {} \;
fi
if true
then
# install srvk EESEN (does not require CUDA)
git clone https://github.com/srvk/eesen
cd eesen/tools
git reset --hard 581d80f # 2016/12/09 support OpenFST 1.5.1
make -kj `nproc` || make || exit 1;
# remove a parameter from scoring script
sed -i 's/\ lur//g' sctk/bin/hubscr.pl
cd ../src
./configure --shared #--cudatk-dir=/opt/nvidia/cuda
make -kj `nproc` || make || exit 1;
$SHRINK && find ../tools/sctk-2.4.9/src ../tools/openfst-1.4.1/src . -name '*.o' -exec rm {} \;
$SHRINK && find ../tools/openfst-1.4.1/src ./*bin -executable -type f | xargs -n 1 -P `nproc` gzexe
$SHRINK && find ../tools/openfst-1.4.1/src ./*bin -executable -type f -name '*~' -exec rm {} \;
# install language model building toolkit
cd ../asr_egs/tedlium/v2-30ms
git clone https://github.com/srvk/lm_build
fi
# get eesen-offline-transcriber
mkdir -p /home/${user}/tools
cd /home/${user}/tools
git clone https://github.com/srvk/srvk-eesen-offline-transcriber eesen-offline-transcriber
# make links to EESEN
cd /home/${user}/tools/eesen-offline-transcriber
git reset --hard 8f63f31
#
if [ $TOOLKIT == 'kaldi' ]
then
ln -s /home/${user}/kaldi/egs/swbd/s5/steps .
ln -s /home/${user}/kaldi/egs/swbd/s5/utils .
# aspire models - 8khz online/nnet3 chained
cd /home/${user}/kaldi/egs
wget -nv http://speech-kitchen.org/vms/Data/aspire-chain-model-srvk.tgz
tar zxvf aspire-chain-model-srvk.tgz --dereference
rm aspire-chain-model-srvk.tgz
# fix a hard coded pathname in model config files
sed -i s/er1k/${user}/g aspire/s5/exp/tdnn_7b_chain_online/conf/online.conf
sed -i s/er1k/${user}/g aspire/s5/exp/tdnn_7b_chain_online/conf/ivector_extractor.conf
fi
if true
then
cd /home/${user}/tools/eesen-offline-transcriber
#ln -s /home/${user}/eesen/asr_egs/tedlium/v2-30ms/steps .
#ln -s /home/${user}/eesen/asr_egs/tedlium/v2-30ms/utils .
cd steps && ln -s /home/${user}/eesen/asr_egs/tedlium/v2-30ms/steps/* .
cd ../utils && ln -s /home/${user}/eesen/asr_egs/tedlium/v2-30ms/utils/* .
cd ..
# get models
cd /home/${user}/eesen/asr_egs/tedlium
wget -nv http://speech-kitchen.org/vms/Data/v2-30ms.tgz
tar zxvf v2-30ms.tgz --dereference
rm v2-30ms.tgz
# optionally get 8khz models
if [ -f /vagrant/swbd-v1-pitch.tgz ]
then
cd /home/${user}/eesen/asr_egs/swbd
tar zxvf /vagrant/swbd-v1-pitch.tgz
fi
fi
# Uncomment for optional large language model rescoring
# produces generally 2% better Word Error Rates at the expense of longer
# decoding time and memory requirements (Requires guest VM setting
# of at least vbox.memory = 15360, just barely fitting in a 16GB host
# computer - with warnings) Substitute "make -f Makefile.rescore"
# for "make" in run scripts (speech2text.sh and friends) to use this.
#
# cd /home/${user}
# wget http://speech-kitchen.org/vms/Data/rescore-eesen.tgz
# tar zxvf rescore-eesen.tgz
# rm rescore-eesen.tgz
# get XFCE, xterm if we want guest VM to open windows /menus on host
#sudo apt-get install -y xfce4-panel xterm
# Results (and intermediate files) are placed on the shared host folder
mkdir -p /vagrant/{build,log,transcribe_me,src-audio}
ln -s /vagrant/build /home/${user}/tools/eesen-offline-transcriber/build
ln -s /vagrant/src-audio /home/${user}/tools/eesen-offline-transcriber/src-audio
# Apache: set up web content
cd /vagrant
git clone https://github.com/srvk/www
# set the shared folder to be (mounted as a shared folder in the VM) "www"
sed -i 's|/var/www/html|/vagrant/www|g' /etc/apache2/sites-enabled/000-default.conf
sed -i 's|/var/www/|/vagrant/www/|g' /etc/apache2/apache2.conf
service apache2 restart
# shorten paths used by vagrant ssh -c <command> commands
# by symlinking ~/bin to here
ln -s /home/${user}/tools/eesen-offline-transcriber /home/${user}/bin
# get SLURM stuff
apt-get install -y --no-install-recommends slurm-llnl < /usr/bin/yes
/usr/sbin/create-munge-key -f
mkdir -p /var/run/munge /var/run/slurm-llnl
chown munge:root /var/run/munge
chown slurm:slurm /var/run/slurm-llnl
echo 'OPTIONS="--syslog"' >> /etc/default/munge
cp /vagrant/conf/slurm.conf /etc/slurm-llnl/slurm.conf
cp /vagrant/conf/reconf-slurm.sh /root/
#
# supervisor stuff needed by slurm
# copy config first so it gets picked up
cp /vagrant/conf/supervisor.conf /etc/supervisor.conf
mkdir -p /etc/supervisor/conf.d
cp /vagrant/conf/slurm.sv.conf /etc/supervisor/conf.d/
# now start service
apt-get install -y supervisor
# provisioning runs as root; we want files to belong to '${user}'
chown -R ${user}:${user} /home/${user}
# Handy info
echo ""
echo "------------------------------------------------------------"
echo ""
echo " Watching folder [...]/eesen-transcriber/transcribe_me/"
echo " for new files to transcribe. Output *.ctm files"
echo " will appear alongside the original audio files"
echo " logs are in [...]/eesen-transcriber/log/"
echo ""
echo " Point your Chrome or Safari browser to "
if [ ${user} == vagrant ]
then
echo " http://192.168.56.101/ to view transcription results"
else
publicIP=`curl -s http://169.254.169.254/latest/meta-data/public-ipv4`
echo " http://${publicIP}/ to view transcription results"
fi
echo ""
echo "------------------------------------------------------------"
SHELL
end
# always monitor watched folder
Vagrant.configure("2") do |config|
config.vm.provision "shell", run: "always", inline: <<-SHELL
if grep --quiet vagrant /etc/passwd
then
user="vagrant"
else
user="ubuntu"
fi
rm -rf /var/run/motd.dynamic
su ${user} -c "cd /home/${user}/tools/eesen-offline-transcriber && ./watch.sh >& /vagrant/log/watched.log &"
SHELL
end