Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
242 commits
Select commit Hold shift + click to select a range
4d2cde9
chain-smbr: Adding chain-smbr denominator
vimalmanohar Mar 25, 2016
58fcd61
long_utts: Minor fix
vimalmanohar May 18, 2017
d97edb6
added Transfer learning setup using nnet3+chain+tdnn for WSJ->RM.
pegahgh May 18, 2017
1c25c88
Merge branch 'master' of https://github.com/kaldi-asr/kaldi into tran…
pegahgh May 18, 2017
df90bba
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 19, 2017
ff4ac04
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 20, 2017
29ced2a
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 21, 2017
0f69bbd
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 22, 2017
b5fe795
fixed some issues w.r.t comments.
pegahgh May 22, 2017
143877b
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 23, 2017
947f05c
merged with kaldi_52 and fixed some incompatibility issues.
pegahgh May 23, 2017
acf4c14
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 24, 2017
f887582
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 25, 2017
a5d4ca8
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 26, 2017
ea930eb
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 27, 2017
8f05cdf
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 28, 2017
55cc6f9
[WIP] Add chain semi-supervised script + src changes
hhadian May 29, 2017
b91276b
Minor fixes
hhadian May 29, 2017
ba26120
Add more options to run_semisupervised.sh
hhadian May 30, 2017
1eb41ee
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 30, 2017
79e15ad
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar May 31, 2017
477bdf3
Add a check in supervision code
hhadian May 31, 2017
61e94b2
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 1, 2017
d45ecb9
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 2, 2017
f55f686
Some fixes + new options
hhadian Jun 2, 2017
c6ffb15
Merge branch 'master' into semi_supervised
hhadian Jun 3, 2017
7b01bb0
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 3, 2017
403e3e2
Add nnet3, chain, and semi_sepervised scripts for fisher english
hhadian Jun 6, 2017
0c8974e
Merge remote-tracking branch 'origin/semi_supervised' into semi_super…
hhadian Jun 6, 2017
e1de4e4
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 8, 2017
41952cd
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 9, 2017
2e2b3d1
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 11, 2017
51c32f7
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 14, 2017
232397e
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 15, 2017
1414f6f
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 16, 2017
c65ef65
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 17, 2017
9677175
Merge branch 'master' of github.com:vimalmanohar/kaldi into chain-smbr
vimalmanohar Jun 20, 2017
20cf238
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 20, 2017
ae1cfe1
Merge branch 'master' of github.com:vimalmanohar/kaldi into chain-smbr
vimalmanohar Jun 21, 2017
bf56938
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 21, 2017
0bacc83
SMBR chain
vimalmanohar Jun 22, 2017
2c43456
chain-smbr: Bug fixes
vimalmanohar Jun 22, 2017
6adc948
Chain SMBR fixes
vimalmanohar Jun 22, 2017
2959279
chain-smbr: Bug fixes
vimalmanohar Jun 22, 2017
51ec051
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 22, 2017
758e9a4
chain-smbr: Bug fix
vimalmanohar Jun 22, 2017
d364040
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 23, 2017
2f15292
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 24, 2017
d8db02d
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 25, 2017
9d97243
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 27, 2017
57d1016
temp
vimalmanohar Jun 22, 2017
a03b401
smbr-dash
vimalmanohar Jun 22, 2017
0682618
smbr without leaky
vimalmanohar Jun 24, 2017
62da39a
chain-smbr: Fix bugs in chain smbr
vimalmanohar Jun 27, 2017
5b7879d
smbr training
vimalmanohar Jun 27, 2017
378267b
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 28, 2017
a973632
Adding missing chain-smbr-kernels.cu
vimalmanohar Jun 29, 2017
e7d9d52
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 29, 2017
55d3321
Add phone-insertion-penalty + minor updates
hhadian Jun 29, 2017
0a19c27
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jun 30, 2017
f776b3a
Minor bug fixes
vimalmanohar Jun 30, 2017
d1b872c
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 1, 2017
c11756d
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 4, 2017
8fd9f19
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 7, 2017
f37c374
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 8, 2017
a89d02d
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 9, 2017
4c86384
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 10, 2017
774d78e
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 12, 2017
845f27b
chain-smbr: Adding smbr
vimalmanohar Jul 12, 2017
545154a
added scripts for new weight transfer method for transferring all lay…
pegahgh Jul 14, 2017
5248c1a
merged with master
pegahgh Jul 14, 2017
40c85dc
updated PR w.r.t comments.
pegahgh Jul 14, 2017
39a731f
small fix to parser.py.
pegahgh Jul 14, 2017
970842e
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 15, 2017
c1996ff
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 17, 2017
72480ec
fixed issues w.r.t. comments (except prepare_wsj_rm_lang.sh).
pegahgh Jul 17, 2017
7559d3a
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 18, 2017
e0d43a6
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 20, 2017
4a217ea
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Jul 22, 2017
48d8161
chain: Fixes for silence
vimalmanohar Jul 23, 2017
9fedda9
chain: Updating chain script
vimalmanohar Jul 23, 2017
de34ec4
Merging masterR
vimalmanohar Jul 23, 2017
e51826a
fixed small issue with language-model.*.
pegahgh Jul 29, 2017
d64d017
semisup: Updating semisupervised scripts
vimalmanohar Aug 4, 2017
0a6b824
added new Xconfig layer to parse existing model and modified run_tdnn…
pegahgh Aug 6, 2017
e830a04
modified scripts to accept --trainer.input-model and prepare *.fst ou…
pegahgh Aug 9, 2017
49bcf2e
removed changes to language-model.* and generated weighted phone lm u…
pegahgh Aug 10, 2017
d25e63a
optimized alignment processing stage in weighted phone lm generation.
pegahgh Aug 10, 2017
f2d01ae
added check to have possitive int as phone lm weights.
pegahgh Aug 10, 2017
293c531
fixed small issue with train_dnn.py.
pegahgh Aug 10, 2017
2462cf5
merged with kaldi/master.
pegahgh Aug 10, 2017
5b510f9
fixed some issues.
pegahgh Aug 11, 2017
ac95720
fixed some issues.
pegahgh Aug 15, 2017
ed8b952
fixed some comments and removed some options.
pegahgh Aug 17, 2017
b92a63a
semisup: Adding some extra script for semi-supervised recipes
vimalmanohar Aug 17, 2017
7a9ef54
fixed src dirs options for transfer learning scripts 1{a,b,c} and mod…
pegahgh Aug 17, 2017
4d8ec90
semisup: Merging from master
vimalmanohar Aug 18, 2017
775b34d
minor change to prepare for tf learning
vimalmanohar Aug 23, 2017
a2d5e62
semisup: Merging transfer learning
vimalmanohar Aug 23, 2017
e0fd23e
semisup: Separate tolerance for silence
vimalmanohar Aug 23, 2017
405af6c
Merge branch 'chain-smbr' of github.com:vimalmanohar/kaldi into semis…
vimalmanohar Aug 23, 2017
89e574b
modified comments in xconfig and train.py and modified scripts to gen…
pegahgh Aug 24, 2017
eb00983
small fix.
pegahgh Aug 24, 2017
ef7275b
fixed old comments and added new comments.
pegahgh Aug 24, 2017
82fa510
fixed some issues in python codes using pylint package.
pegahgh Aug 24, 2017
40dc5e4
smbr: Fix aux objf
vimalmanohar Aug 24, 2017
bd20bdf
semisup: Merge chain-smbr
vimalmanohar Aug 24, 2017
1a74866
semisup: Merge chain-smbr
vimalmanohar Aug 24, 2017
a856dea
Update parser.py
pegahgh Aug 26, 2017
55a64ff
Update run_tdnn_wsj_rm_1c.sh
pegahgh Aug 30, 2017
c2593d8
Update basic_layers.py
pegahgh Aug 30, 2017
26b4ddd
Update parser.py
pegahgh Aug 30, 2017
90fc04a
chain: objective function fixes
vimalmanohar Sep 1, 2017
d811e15
semisup: Minor fixes to chain semisup
vimalmanohar Sep 1, 2017
af050b6
semisup: Add more recipes
vimalmanohar Sep 1, 2017
80db322
[egs] Fix default for egs.cmd
vimalmanohar Sep 1, 2017
ea3f34a
semisup-clean: Removing some recipes
vimalmanohar Sep 1, 2017
3c7780d
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Sep 2, 2017
82daf84
Update xconfig_to_configs.py
vimalmanohar Sep 2, 2017
f6bea67
semisup: Merging transfer learning
vimalmanohar Sep 2, 2017
24bd794
semisup: Merging from transfer-learning
vimalmanohar Sep 2, 2017
417b3cf
semisup: Merging from transfer-learning
vimalmanohar Sep 2, 2017
9c45f34
semisup: Removing some recipes
vimalmanohar Sep 2, 2017
f88a115
Merge pull request #12 from vimalmanohar/patch-4
pegahgh Sep 2, 2017
ed63b19
Update make_weighted_den_fst.sh
vimalmanohar Sep 3, 2017
43d1fe2
Merge pull request #13 from vimalmanohar/patch-5
pegahgh Sep 4, 2017
d397d3f
semisup: Removing unrequired codes
vimalmanohar Sep 4, 2017
2de6266
semisup: Removing more unrequired codes
vimalmanohar Sep 4, 2017
e025ee2
semisup: Remove build_tree_from_lats
vimalmanohar Sep 4, 2017
6fecd2b
Remove unrelated codes
vimalmanohar Sep 4, 2017
f6f4e29
semisup: Cleaning up scripts not used
vimalmanohar Sep 4, 2017
125abf0
fixed small issues.
pegahgh Sep 6, 2017
f51492b
fixed small issue.
pegahgh Sep 6, 2017
ba308ea
modified make_weighted_den_fst.sh
pegahgh Sep 10, 2017
8fae871
modified weighted_den_fst.sh
pegahgh Sep 10, 2017
6f5e8eb
fixed some issues.
pegahgh Sep 12, 2017
3985924
fixed some small issues.
pegahgh Sep 12, 2017
17bb56f
Merge branch 'master' into transfer-learning-wsj-rm
danpovey Sep 13, 2017
fe07c0b
[scripts] Cosmetic and other improvements to make_weighted_den_fst.sh…
danpovey Sep 13, 2017
b5ce647
smbr: Logging bug fix
vimalmanohar Sep 13, 2017
967531d
semisup: Extend trivial output layer
vimalmanohar Sep 13, 2017
e5e57ee
temp fix
vimalmanohar Sep 13, 2017
9ff681a
Merging from transfer learning
vimalmanohar Sep 13, 2017
a34655c
Merge branch 'transfer_learning' of github.com:danpovey/kaldi into se…
vimalmanohar Sep 13, 2017
088aad3
semisup: Merging the finalized transfer-learning
vimalmanohar Sep 15, 2017
d61cb4b
semisup: Adding lattice splitting chain code
vimalmanohar Sep 25, 2017
8772dba
semisup: Adding tolerances to lattices
vimalmanohar Oct 3, 2017
339c435
Old tolerance approach
vimalmanohar Oct 11, 2017
e90ca23
semisup: adding mbr supervision
vimalmanohar Oct 16, 2017
ea6ed69
semisup: Adding semisup recipes
vimalmanohar Oct 16, 2017
bacca8b
Minor bug fix in get_egs.sh
vimalmanohar Oct 17, 2017
417ecfd
Best path system recipe
vimalmanohar Oct 17, 2017
6f0de80
Add some minor check
vimalmanohar Oct 18, 2017
c6aa0e4
Updates to work with RNNLM
vimalmanohar Oct 19, 2017
c22bd48
Fix tolerance fst
vimalmanohar Oct 20, 2017
0d8af58
Minor fix to _m
Oct 20, 2017
f0c9fe1
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
Oct 20, 2017
5bfdd39
Tolerance fst fixed
vimalmanohar Oct 22, 2017
37cafe8
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
Oct 22, 2017
479e769
semisup: Fixing some bugs and making cleaner scripts
Oct 27, 2017
a3c3703
minor changes
vimalmanohar Oct 27, 2017
90e88ba
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
vimalmanohar Oct 27, 2017
bf10730
semisup: Changes to get_egs
Oct 27, 2017
18093ae
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
vimalmanohar Oct 27, 2017
0bbd2ce
semisup: Adding 100k experiments
Oct 29, 2017
99b8fc1
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
vimalmanohar Oct 29, 2017
f392d74
Changed permissions
vimalmanohar Oct 30, 2017
ebe5e8d
[egs] Bug fix in train_raw_dnn.py
vimalmanohar Sep 26, 2017
fbedee0
steps/cleanup: Fixed corner case in resolve_ctm_edits_overlaps.py
vimalmanohar Nov 1, 2017
fe7d835
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Nov 2, 2017
05ba2d9
Binaries for undeterminized lattices
Nov 2, 2017
fcefeaa
semisup: Adding tfrnnlm scripts
vimalmanohar Nov 2, 2017
ada93ca
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Nov 3, 2017
a40461c
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Nov 4, 2017
3be8143
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Nov 5, 2017
a0572b5
semisup: Undeterminized lattices recipes
Nov 6, 2017
8a035ab
semisup-smbr: Bug fix in 15k_s
vimalmanohar Nov 6, 2017
155b90a
Undo _s changes
vimalmanohar Nov 6, 2017
34f780a
semisup-smbr: Adding undeterminized version of rescoring
Nov 6, 2017
62b0f3b
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
vimalmanohar Nov 6, 2017
0651075
semisup-smbr: Fix undeterminized lattice rescoring
Nov 6, 2017
35afc06
Merge branch 'semisup-smbr' of github.com:vimalmanohar/kaldi into sem…
vimalmanohar Nov 6, 2017
eadc843
semisup: 50 hours recipe
vimalmanohar Nov 12, 2017
c5acc17
semisup: Pocolm for fisher english
vimalmanohar Nov 14, 2017
f71741a
semisup: Fix lattice rescoring
Nov 17, 2017
5103952
semisup: Code changes for undeterminized lattices
Nov 17, 2017
fc472c3
semisup: Adding more recipes
Nov 17, 2017
010bc4e
semisup: Unk model on Fisher
vimalmanohar Nov 17, 2017
d43125b
semisup: Bug fix in ivectors in semi-supervised scenario
vimalmanohar Nov 17, 2017
82efedb
semisup: Minor fixes to scripts
vimalmanohar Nov 20, 2017
df9f480
semisup-clean: Temporary merge
vimalmanohar Nov 20, 2017
b1805bf
semisup-clean: Merging semisup-smbr
vimalmanohar Nov 20, 2017
65fbcd7
semisup-clean: Removing some unused scripts
vimalmanohar Nov 21, 2017
5cda53b
semisup-clean: Removing experimental scripts
vimalmanohar Nov 21, 2017
bd2b2d7
semisup-clean: Removing smart splitting code
vimalmanohar Nov 21, 2017
2380264
semisup-clean: Remove support for non-compact lattices
vimalmanohar Nov 21, 2017
995bf24
semisup-clean: Remove smart splitting recipes
vimalmanohar Nov 21, 2017
5ca1012
semisup-clean: Removing experimental codes and cleanup
vimalmanohar Nov 21, 2017
ceed512
semisup-clean: Remove no-chunking stuff
vimalmanohar Nov 21, 2017
7b0c1a5
semisup-clean: UNK model fisher
vimalmanohar Nov 21, 2017
69495d0
Merge branch 'master' of github.com:vimalmanohar/kaldi into semisup-c…
vimalmanohar Nov 21, 2017
13d78fc
semisup-clean: Keep only the important recipes 100 hours sup
vimalmanohar Nov 21, 2017
e0ff557
semisup-clean: Add write-compact=false option
vimalmanohar Nov 23, 2017
2237087
semisup: Removing some unnecessary parts
vimalmanohar Nov 28, 2017
e762533
semisup-smbr: Minor updates
vimalmanohar Nov 28, 2017
d5c7edf
semisup-clean: Removing some tuning scripts
vimalmanohar Nov 28, 2017
a627cd7
semisup-clean: Remove rnnlm stuff for now
vimalmanohar Nov 28, 2017
17f9165
semisup: Removing some old modifications
vimalmanohar Nov 28, 2017
e3b7d72
semisup-smbr: Re-organizing stuff
vimalmanohar Nov 28, 2017
76cc0a0
semisup-smbr: Adding more recipes
vimalmanohar Nov 28, 2017
47ab45a
semisup-smbr: Add stages to scoring scripts
vimalmanohar Nov 28, 2017
c4488ba
semisup-clean: Merging latest changes
vimalmanohar Nov 28, 2017
fe72721
semisup-clean: Keep only changes to be committed now
vimalmanohar Nov 29, 2017
1dc7e27
semisup-clean: Remove smart splitting recipes
vimalmanohar Dec 1, 2017
9ba5c34
semisup-clean: Remove and cleanup some recipe
vimalmanohar Dec 1, 2017
b99764c
semisup-clean: cleaned up ivector extractor script
vimalmanohar Dec 1, 2017
37bb897
semisup: unk model script
vimalmanohar Dec 1, 2017
ec15e64
temp changes
vimalmanohar Dec 1, 2017
b3e1142
semisup-clean: Merging latest changes semisup
vimalmanohar Dec 1, 2017
c3e32f1
semisup-clean: Cleaning up recipes
vimalmanohar Dec 3, 2017
75fbde4
Merge branch 'master' of github.com:kaldi-asr/kaldi
vimalmanohar Dec 8, 2017
e62dac0
semisup: Making changes based on comments
vimalmanohar Dec 8, 2017
ac5da45
Merging from kaldi master
vimalmanohar Dec 8, 2017
5d1f4c9
semisup: Minor fixes
vimalmanohar Dec 11, 2017
f3fd4a9
semisup: Minor fixes
vimalmanohar Dec 11, 2017
0a69689
semisup: Re-organizing some scripts
vimalmanohar Dec 11, 2017
cce099f
Reverting discriminative changes for now
vimalmanohar Dec 28, 2017
ed5efd6
Revert some changes not required now
vimalmanohar Dec 28, 2017
5cafdc5
Merging from golden
vimalmanohar Dec 28, 2017
7a1ff5c
Minor fix
vimalmanohar Dec 28, 2017
e2c6603
Removingfew files from the PR
vimalmanohar Dec 29, 2017
2c06bf5
Clean the recipe
vimalmanohar Jan 8, 2018
9933ea7
Added some checks
vimalmanohar Jan 9, 2018
db0bc54
Remove truncate-deriv-weights
vimalmanohar Jan 9, 2018
c71cf88
Remove some unused binaries in chainbin get-egs
vimalmanohar Jan 9, 2018
6d8350e
Remove mkgraph.sh changes
vimalmanohar Jan 9, 2018
ef4750a
Merge branch 'master' of github.com:kaldi-asr/kaldi into semisup-clean
vimalmanohar Jan 9, 2018
0ee0075
Remove some tuning scripts
vimalmanohar Jan 9, 2018
4908983
Add recipe for build tree multiple sources
vimalmanohar Jan 9, 2018
85780b1
Remove some lattice function changes
vimalmanohar Jan 9, 2018
842dce9
Rename some scripts
vimalmanohar Jan 9, 2018
926dc3a
semisup: Reduce the number of scripts
vimalmanohar Jan 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions egs/fisher_english/s5/local/fisher_create_test_lang.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
#!/bin/bash
#

if [ -f path.sh ]; then . ./path.sh; fi

mkdir -p data/lang_test
# This script formats ARPA LM into G.fst.

arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
dir=data/lang_test

if [ -f ./path.sh ]; then . ./path.sh; fi
. utils/parse_options.sh

[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;

mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
mkdir -p $dir
cp -r data/lang/* $dir

gunzip -c "$arpa_lm" | \
arpa2fst --disambig-symbol=#0 \
--read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst
--read-symbol-table=$dir/words.txt - $dir/G.fst


echo "Checking how stochastic G is (the first of these numbers should be small):"
fstisstochastic data/lang_test/G.fst
fstisstochastic $dir/G.fst

## Check lexicon.
## just have a look and make sure it seems sane.
Expand All @@ -27,22 +29,21 @@ fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/l
echo Performing further checks

# Checking that G.fst is determinizable.
fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
fstdeterminize $dir/G.fst /dev/null || echo Error determinizing G.

# Checking that L_disambig.fst is determinizable.
fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
fstdeterminize $dir/L_disambig.fst /dev/null || echo Error determinizing L.

# Checking that disambiguated lexicon times G is determinizable
# Note: we do this with fstdeterminizestar not fstdeterminize, as
# fstdeterminize was taking forever (presumbaly relates to a bug
# in this version of OpenFst that makes determinization slow for
# some case).
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
fsttablecompose $dir/L_disambig.fst $dir/G.fst | \
fstdeterminizestar >/dev/null || echo Error

# Checking that LG is stochastic:
fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \
fsttablecompose data/lang/L_disambig.fst $dir/G.fst | \
fstisstochastic || echo "[log:] LG is not stochastic"


echo "$0 succeeded"
170 changes: 170 additions & 0 deletions egs/fisher_english/s5/local/fisher_train_lms_pocolm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/bin/bash

# Copyright 2016 Vincent Nguyen
# 2016 Johns Hopkins University (author: Daniel Povey)
# 2017 Vimal Manohar
# Apache 2.0
#
# It is based on the example scripts distributed with PocoLM

set -e
stage=0

text=data/train/text
lexicon=data/local/dict/lexicon.txt
dir=data/local/pocolm

num_ngrams_large=5000000
num_ngrams_small=2500000

echo "$0 $@" # Print the command line for logging
. utils/parse_options.sh || exit 1;

lm_dir=${dir}/data

mkdir -p $dir
. ./path.sh || exit 1; # for KALDI_ROOT
export PATH=$KALDI_ROOT/tools/pocolm/scripts:$PATH
( # First make sure the pocolm toolkit is installed.
cd $KALDI_ROOT/tools || exit 1;
if [ -d pocolm ]; then
echo Not installing the pocolm toolkit since it is already there.
else
echo "$0: Please install the PocoLM toolkit with: "
echo " cd ../../../tools; extras/install_pocolm.sh; cd -"
exit 1;
fi
) || exit 1;

for f in "$text" "$lexicon"; do
[ ! -f $x ] && echo "$0: No such file $f" && exit 1;
done

num_dev_sentences=10000

#bypass_metaparam_optim_opt=
# If you want to bypass the metaparameter optimization steps with specific metaparameters
# un-comment the following line, and change the numbers to some appropriate values.
# You can find the values from output log of train_lm.py.
# These example numbers of metaparameters is for 4-gram model (with min-counts)
# running with train_lm.py.
# The dev perplexity should be close to the non-bypassed model.
#bypass_metaparam_optim_opt="--bypass-metaparameter-optimization=0.854,0.0722,0.5808,0.338,0.166,0.015,0.999,0.6228,0.340,0.172,0.999,0.788,0.501,0.406"
# Note: to use these example parameters, you may need to remove the .done files
# to make sure the make_lm_dir.py be called and tain only 3-gram model
#for order in 3; do
#rm -f ${lm_dir}/${num_word}_${order}.pocolm/.done

if [ $stage -le 0 ]; then
mkdir -p ${dir}/data
mkdir -p ${dir}/data/text

echo "$0: Getting the Data sources"

rm ${dir}/data/text/* 2>/dev/null || true

cleantext=$dir/text_all.gz

cut -d ' ' -f 2- $text | awk -v lex=$lexicon '
BEGIN{
while((getline<lex) >0) { seen[$1]=1; }
}
{
for(n=1; n<=NF;n++) {
if (seen[$n]) {
printf("%s ", $n);
} else {
printf("<unk> ");
}
}
printf("\n");
}' | gzip -c > $cleantext || exit 1;

# This is for reporting perplexities
gunzip -c $dir/text_all.gz | head -n $num_dev_sentences > \
${dir}/data/test.txt

# use a subset of the annotated training data as the dev set .
# Note: the name 'dev' is treated specially by pocolm, it automatically
# becomes the dev set.
gunzip -c $dir/text_all.gz | tail -n +$[num_dev_sentences+1] | \
head -n $num_dev_sentences > ${dir}/data/text/dev.txt

gunzip -c $dir/text_all.gz | tail -n +$[2*num_dev_sentences+1] > \
${dir}/data/text/train.txt

# for reporting perplexities, we'll use the "real" dev set.
# (a subset of the training data is used as ${dir}/data/text/dev.txt to work
# out interpolation weights.
# note, we can't put it in ${dir}/data/text/, because then pocolm would use
# it as one of the data sources.
cut -d " " -f 2- < data/dev_and_test/text > ${dir}/data/real_dev_set.txt

cat $lexicon | awk '{print $1}' | sort | uniq | awk '
{
if ($1 == "<s>") {
print "<s> is in the vocabulary!" | "cat 1>&2"
exit 1;
}
if ($1 == "</s>") {
print "</s> is in the vocabulary!" | "cat 1>&2"
exit 1;
}
printf("%s\n", $1);
}' > $dir/data/wordlist || exit 1;
fi

order=4
wordlist=${dir}/data/wordlist

lm_name="`basename ${wordlist}`_${order}"
min_counts='train=1'
if [ -n "${min_counts}" ]; then
lm_name+="_`echo ${min_counts} | tr -s "[:blank:]" "_" | tr "=" "-"`"
fi

unpruned_lm_dir=${lm_dir}/${lm_name}.pocolm

if [ $stage -le 1 ]; then
# decide on the vocabulary.
# Note: you'd use --wordlist if you had a previously determined word-list
# that you wanted to use.
# Note: if you have more than one order, use a certain amount of words as the
# vocab and want to restrict max memory for 'sort',
echo "$0: training the unpruned LM"
train_lm.py --wordlist=${wordlist} --num-splits=10 --warm-start-ratio=20 \
--limit-unk-history=true \
--fold-dev-into=train ${bypass_metaparam_optim_opt} \
--min-counts="${min_counts}" \
${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}

get_data_prob.py ${dir}/data/test.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' | tee ${unpruned_lm_dir}/perplexity_test.log

get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity' | tee ${unpruned_lm_dir}/perplexity_real_dev_set.log
fi

if [ $stage -le 2 ]; then
echo "$0: pruning the LM (to larger size)"
# Using 5 million n-grams for a big LM for rescoring purposes.
prune_lm_dir.py --target-num-ngrams=$num_ngrams_large --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big

get_data_prob.py ${dir}/data/test.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity' | tee ${dir}/data/lm_${order}_prune_big/perplexity_test.log

get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity' | tee ${dir}/data/lm_${order}_prune_big/perplexity_real_dev_set.log

mkdir -p ${dir}/data/arpa
format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
fi

if [ $stage -le 3 ]; then
echo "$0: pruning the LM (to smaller size)"
# Using 2.5 million n-grams for a smaller LM for graph building.
# Prune from the bigger-pruned LM, it'll be faster.
prune_lm_dir.py --target-num-ngrams=$num_ngrams_small ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small

get_data_prob.py ${dir}/data/test.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity' | tee ${dir}/data/lm_${order}_prune_small/perplexity_test.log

get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity' | tee ${dir}/data/lm_${order}_prune_small/perplexity_real_dev_set.log

format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
fi
Loading