Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions egs/swbd/s5c/local/swbd1_prepare_dict.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text
[ ! -f "$srcdict" ] && echo "$0: No such file $srcdict" && exit 1;

cp $srcdict $dir/lexicon0.txt || exit 1;
chmod +r $dir/lexicon0.txt # fix a strange permission in the source.
patch <local/dict.patch $dir/lexicon0.txt || exit 1;

#(2a) Dictionary preparation:
Expand Down
10 changes: 4 additions & 6 deletions src/doc/mainpage.dox
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@
/**
\mainpage Kaldi

Now <a href=kaldi.sourceforge.net>kaldi.sourceforge.net</a> is only a backup
location. <a href=http://kaldi-asr.org>kaldi-asr.org/doc</a> is the
definitive location of this documentation. Kaldi's code repository is
now located at <a href="https://github.com/kaldi-asr/kaldi">https://github.com/kaldi-asr/kaldi</a>
Kaldi is a toolkit for speech recognition, intended for use by speech recognition researchers and
professionals.

Find the code repository at <a href="https://github.com/kaldi-asr/kaldi">https://github.com/kaldi-asr/kaldi</a>.

See also the top level of <a href=http://kaldi-asr.org>kaldi-asr.org</a>, where
you can download pre-built models.

<p>
- \subpage about
Expand Down
17 changes: 8 additions & 9 deletions src/ivectorbin/select-voiced-frames.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,30 @@ int main(int argc, char *argv[]) {
const char *usage =
"Select a subset of frames of the input files, based on the output of\n"
"compute-vad or a similar program (a vector of length num-frames,\n"
"containing 1.0 for voiced, 0.0 for unvoiced).\n"
"containing 1.0 for voiced, 0.0 for unvoiced). Caution: this is\n"
"mostly useful only in speaker identification applications.\n"
"Usage: select-voiced-frames [options] <feats-rspecifier> "
" <vad-rspecifier> <feats-wspecifier>\n"
"E.g.: select-voiced-frames [options] scp:feats.scp scp:vad.scp ark:-\n";

ParseOptions po(usage);
po.Read(argc, argv);

if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}

std::string feat_rspecifier = po.GetArg(1),
vad_rspecifier = po.GetArg(2),
feat_wspecifier = po.GetArg(3);

SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
RandomAccessBaseFloatVectorReader vad_reader(vad_rspecifier);
BaseFloatMatrixWriter feat_writer(feat_wspecifier);

int32 num_done = 0, num_err = 0;

for (;!feat_reader.Done(); feat_reader.Next()) {
std::string utt = feat_reader.Key();
const Matrix<BaseFloat> &feat = feat_reader.Value();
Expand All @@ -72,8 +73,8 @@ int main(int argc, char *argv[]) {
const Vector<BaseFloat> &voiced = vad_reader.Value(utt);

if (feat.NumRows() != voiced.Dim()) {
KALDI_WARN << "Mismatch in number for frames " << feat.NumRows()
<< " for features and VAD " << voiced.Dim()
KALDI_WARN << "Mismatch in number for frames " << feat.NumRows()
<< " for features and VAD " << voiced.Dim()
<< ", for utterance " << utt;
num_err++;
continue;
Expand Down Expand Up @@ -111,5 +112,3 @@ int main(int argc, char *argv[]) {
return -1;
}
}


59 changes: 38 additions & 21 deletions src/lat/compose-lattice-pruned.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class PrunedCompactLatticeComposer {

// 'arc_delta_costs' is an array, one for each arc (and the final-prob, if
// present), showing how much the cost to the final-state for the best path
// starting in this state and exiting through each arc (or final-state),
// starting in this state and exiting through each arc (or final-prob),
// differs from 'backward_cost'. Specifically, it contains pairs
// (delta_cost, arc_index), where delta_cost >= 0 and arc_index is
// either the index into this state's array of arcs (for arcs), or -1
Expand Down Expand Up @@ -201,8 +201,8 @@ class PrunedCompactLatticeComposer {
// - If backward_cost is finite (this state in the composed result can
// reach the final state via currently expanded states), then
// delta_backward_cost is this->backward_cost minus
// lat_state_info_[lat_state].backward_cost. (It will mostly,
// not always, be >= 0, reflecting that the new LM is better than
// lat_state_info_[lat_state].backward_cost. (It will mostly, but
// not always, be <= 0, reflecting that the new LM is better than
// the old LM).
// - On the other hand, if backward_cost is infinite: delta_backward_cost
// is set to the delta_backward_cost of the previous state on the best
Expand All @@ -216,7 +216,10 @@ class PrunedCompactLatticeComposer {
// - For other states, delta_backward_cost will be unchanged since
// RecomputePruningInfo() was last called.
// The above rules may make the delta_backward_cost a less accurate, but
// still probably reasonable, heuristic.
// still probably reasonable, heuristic. What it is a heuristic for,
// is: if we were to successfully reach an end-state of the composed output
// from this state, what would be the resulting backward_cost
// minus lat_state_info_[lat_state].backward_cost.
BaseFloat delta_backward_cost;

// 'prev_composed_state' is the previous state on the best path from
Expand All @@ -226,21 +229,26 @@ class PrunedCompactLatticeComposer {
// used.
int32 prev_composed_state;

// 'sorted_arc_index' is an index into the 'arc_delta_costs' array of the
// LatticeStateInfo corresponding to 'lat_state'. It corresponds to the
// next arc (or final-prob) out of the input lattice that we have yet to
// expand in the composition; or -1 if we have expanded all of them. When
// we first reach a composed state, 'sorted_arc_index' will be zero; then it
// will increase one at a time as we expand arcs until either the
// composition terminates or we have expanded all the arcs and it becomes
// -1.
// 'sorted_arc_index' is an index into the 'arc_delta_costs' array which is
// a member of the LatticeStateInfo object corresponding to the lattice
// state 'lat_state'. It corresponds to the next arc (or final-prob) out of
// the input lattice that we have yet to expand in the composition; or -1 if
// we have expanded all of them. When we first reach a composed state,
// 'sorted_arc_index' will be zero; then it will increase one at a time as
// we expand arcs until either the composition terminates or we have
// expanded all the arcs and it becomes -1.
int32 sorted_arc_index;

// 'arc_delta_cost' is a derived quantity that we store here for easier
// access. Suppose this_lat_info is lat_state_info_[lat_state]; then
// if sorted_arc_index >= 0, then:
// arc_delta_cost == this_lat_info.arc_delta_costs[sorted_arc_index].first
// else: arc_delta_cost == +infinity.
//
// what 'arc_delta_cost' represents (or is a heuristic for), is the expected
// cost of a path to the final-state leaving through the arc we're about to
// expand, minus the expected cost of any path to the final-state starting
// from this state.
BaseFloat arc_delta_cost;

// view 'expected_cost_offset' a phantom field of this struct, that has
Expand All @@ -249,16 +257,20 @@ class PrunedCompactLatticeComposer {
//
// 'expected_cost_offset' is a derived quantity that reflects the expected
// cost (according to our heuristic) of the best path we might encounter
// when expanding the next previously unseen arc (or final-prob), corresponding
// to 'sorted_arc_index'.
// when expanding the next previously unseen arc (or final-prob),
// corresponding to 'sorted_arc_index'. (This is the expected cost of a
// successful path, from the beginning to the end of the lattice, but
// constrained to be a path that contains the arc we're about to expand).
//
// The 'offset' part is about subtracting the best cost of the lattice, so we
// can cast to float without too much loss of accuracy:
// expected_cost_offset = expected_cost - lat_best_cost_.
//
// We define expected_cost_offset by defining the 'expected_cost' part;
// for clarity:
// First, let lat_backward_cost equal the backward_cost of the LatticeStateInfo
// corresponding to 'lat_state'. Then:
// corresponding to 'lat_state', i.e.
// lat_backward_cost = lat_state_info_[lat_state].backward_cost. Then:
// expected_cost = forward_cost + lat_backward_cost +
// delta_backward_cost + arc_delta_cost.
// expected_cost_offset will always equal the above minus lat_best_cost_.
Expand Down Expand Up @@ -298,7 +310,7 @@ class PrunedCompactLatticeComposer {

// current_cutoff_ is a value used in deciding which composed states
// need to be included in the queue. Each time RecomputePruningInfo()
// called, it is set to
// called, current_cutoff_ is set to
// (output_best_cost_ - lat_best_cost_ + opts_.lattice_compose_beam).
// It will be +infinity if the output lattice doesn't yet have any
// successful paths. It decreases with time. You can compare the
Expand Down Expand Up @@ -755,8 +767,9 @@ void PrunedCompactLatticeComposer::ProcessTransition(int32 src_composed_state,
std::pair<MapType::iterator, bool> ret =
pair_to_state_.insert(value);
if (ret.second) {
// Successfully inserted. Most of the rest of this block deals with the
// consequences of adding a new state.
// Successfully inserted: this dest-state did not already exist. Most of
// the rest of this block deals with the consequences of adding a new
// state.
int32 ans = clat_out_->AddState();
KALDI_ASSERT(ans == new_composed_state);
dest_composed_state = new_composed_state;
Expand All @@ -777,7 +790,8 @@ void PrunedCompactLatticeComposer::ProcessTransition(int32 src_composed_state,
dest_info->delta_backward_cost =
src_info->delta_backward_cost;
// The 'prev_composed_state' field will not be read again until after it's
// overwritten; we set it as below only for debugging purposes.
// overwritten; we set it as below only for debugging purposes (the
// negation is also for debugging purposes).
dest_info->prev_composed_state = -src_composed_state;
dest_info->sorted_arc_index = 0;
dest_info->arc_delta_cost = 0.0;
Expand All @@ -791,8 +805,11 @@ void PrunedCompactLatticeComposer::ProcessTransition(int32 src_composed_state,
dest_info->delta_backward_cost -
lat_best_cost_);
if (expected_cost_offset < current_cutoff_) {
composed_state_queue_.push(std::pair<BaseFloat, int32>(
expected_cost_offset, dest_composed_state));
// the following call should be equivalent to
// composed_state_queue_.push(std::pair<BaseFloat,int32>(...)) with
// the same pair of args.
composed_state_queue_.emplace(expected_cost_offset,
dest_composed_state);
}
} else { // the destination composed state already existed.
dest_composed_state = ret.first->second;
Expand Down
25 changes: 20 additions & 5 deletions tools/extras/install_sequitur.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,19 @@ if [ ! -d ./sequitur-g2p ] ; then
echo >&2 " manually and re-run the script"
fi
}
else
echo >&2 "$0: Updating the repository -- we will try to merge with local changes (if you have any)"
(
cd sequitur-g2p/
git pull
# this would work also, but would drop all local modifications
#git fetch
#git reset --hard origin/master
) || {
echo >&2 "Failed to do git pull, delete the sequitur dir and run again";
exit 1
}
fi
#just to retain backward compatibility for a while. Can be removed
#in a couple of months.
ln -sf sequitur-g2p sequitur

(
cd sequitur-g2p
Expand All @@ -75,15 +84,21 @@ cd sequitur-g2p
#the primary issue is that real GNU GCC does not accept that switch
#in addition, Apple fake g++ based on LLVM version 8.1 prints warning about
#the libstdc++ should no longer be used.
if (g++ --version 2>/dev/null | grep -s "LLVM version 8.0" >/dev/null) ; then
if (g++ --version 2>/dev/null | grep -s "LLVM version 8.0" >/dev/null) ; then
#Apple fake-g++
make CXX=g++ CC=gcc CPPFLAGS="-stdlib=libstdc++"
else
make CXX=g++ CC=gcc
fi

python setup.py install --prefix `pwd`
# the next two lines deal with the issue that the new setup tools
# expect the directory in which we will be installing to be visible
# as module directory to python
site_packages_dir=$(python -m site --user-site | grep -oE "lib.*")
SEQUITUR=$(pwd)/$site_packages_dir
PYTHONPATH=${PYTHONPATH:-}:$SEQUITUR python setup.py install --prefix `pwd`
)

site_packages_dir=$(cd sequitur-g2p; find ./lib{,64} -type d -name site-packages | head -n 1)
(
set +u
Expand Down