Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 17 additions & 20 deletions src/chainbin/nnet3-chain-get-egs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace nnet3 {
*/

static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
const MatrixBase<BaseFloat> &feats,
const GeneralMatrix &feats,
const MatrixBase<BaseFloat> *ivector_feats,
int32 ivector_period,
const chain::Supervision &supervision,
Expand Down Expand Up @@ -108,21 +108,13 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst,
nnet_chain_eg.inputs.resize(ivector_feats != NULL ? 2 : 1);

int32 tot_input_frames = chunk.left_context + chunk.num_frames +
chunk.right_context;

Matrix<BaseFloat> input_frames(tot_input_frames, feats.NumCols(),
kUndefined);

int32 start_frame = chunk.first_frame - chunk.left_context;
for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) {
int32 t2 = t;
if (t2 < 0) t2 = 0;
if (t2 >= num_input_frames) t2 = num_input_frames - 1;
int32 j = t - start_frame;
SubVector<BaseFloat> src(feats, t2),
dest(input_frames, j);
dest.CopyFromVec(src);
}
chunk.right_context,
start_frame = chunk.first_frame - chunk.left_context;

GeneralMatrix input_frames;
ExtractRowRangeWithPadding(feats, start_frame, tot_input_frames,
&input_frames);

NnetIo input_io("input", -chunk.left_context, input_frames);
nnet_chain_eg.inputs[0].Swap(&input_io);

Expand Down Expand Up @@ -193,8 +185,11 @@ int main(int argc, char *argv[]) {
std::string online_ivector_rspecifier;

ParseOptions po(usage);
po.Register("compress", &compress, "If true, write egs in "
"compressed format.");
po.Register("compress", &compress, "If true, write egs with input features "
"in compressed format (recommended). Update: this is now "
"only relevant if the features being read are un-compressed; "
"if already compressed, we keep we same compressed format when "
"dumping-egs.");
po.Register("ivectors", &online_ivector_rspecifier, "Alias for "
"--online-ivectors option, for back compatibility");
po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of "
Expand Down Expand Up @@ -242,7 +237,9 @@ int main(int argc, char *argv[]) {
KALDI_ASSERT(normalization_fst.NumStates() > 0);
}

SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);
// Read as GeneralMatrix so we don't need to un-compress and re-compress
// when selecting parts of matrices.
SequentialGeneralMatrixReader feat_reader(feature_rspecifier);
chain::RandomAccessSupervisionReader supervision_reader(
supervision_rspecifier);
NnetChainExampleWriter example_writer(examples_wspecifier);
Expand All @@ -253,7 +250,7 @@ int main(int argc, char *argv[]) {

for (; !feat_reader.Done(); feat_reader.Next()) {
std::string key = feat_reader.Key();
const Matrix<BaseFloat> &feats = feat_reader.Value();
const GeneralMatrix &feats = feat_reader.Value();
if (!supervision_reader.HasKey(key)) {
KALDI_WARN << "No pdf-level posterior for key " << key;
num_err++;
Expand Down
47 changes: 31 additions & 16 deletions src/featbin/apply-cmvn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ int main(int argc, char *argv[]) {
"Usage: apply-cmvn [options] (<cmvn-stats-rspecifier>|<cmvn-stats-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n"
"e.g.: apply-cmvn --utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-\n"
"See also: modify-cmvn-stats, matrix-sum, compute-cmvn-stats\n";

ParseOptions po(usage);
std::string utt2spk_rspecifier;
bool norm_vars = false;
bool norm_means = true;
bool reverse = false;
std::string skip_dims_str;

po.Register("utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to speaker map");
po.Register("norm-vars", &norm_vars, "If true, normalize variances.");
Expand All @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) {
po.Register("reverse", &reverse, "If true, apply CMVN in a reverse sense, "
"so as to transform zero-mean, unit-variance input into data "
"with the given mean and variance.");

po.Read(argc, argv);

if (po.NumArgs() != 3) {
Expand All @@ -63,31 +63,47 @@ int main(int argc, char *argv[]) {
if (norm_vars && !norm_means)
KALDI_ERR << "You cannot normalize the variance but not the mean.";


std::string cmvn_rspecifier_or_rxfilename = po.GetArg(1);
std::string feat_rspecifier = po.GetArg(2);
std::string feat_wspecifier = po.GetArg(3);

if (!norm_means) {
// CMVN is a no-op, we're not doing anything. Just echo the input
// don't even uncompress, if it was a CompressedMatrix.
SequentialGeneralMatrixReader reader(feat_rspecifier);
GeneralMatrixWriter writer(feat_wspecifier);
kaldi::int32 num_done = 0;
for (;!reader.Done(); reader.Next()) {
writer.Write(reader.Key(), reader.Value());
num_done++;
}
KALDI_LOG << "Copied " << num_done << " utterances.";
return (num_done != 0 ? 0 : 1);
}


std::vector<int32> skip_dims; // optionally use "fake"
// (zero-mean/unit-variance) stats for some
// dims to disable normalization.
if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) {
KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
<< "integers)";
}


kaldi::int32 num_done = 0, num_err = 0;

std::string cmvn_rspecifier_or_rxfilename = po.GetArg(1);
std::string feat_rspecifier = po.GetArg(2);
std::string feat_wspecifier = po.GetArg(3);


SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
BaseFloatMatrixWriter feat_writer(feat_wspecifier);

if (ClassifyRspecifier(cmvn_rspecifier_or_rxfilename, NULL, NULL)
!= kNoRspecifier) { // reading from a Table: per-speaker or per-utt CMN/CVN.
std::string cmvn_rspecifier = cmvn_rspecifier_or_rxfilename;

RandomAccessDoubleMatrixReaderMapped cmvn_reader(cmvn_rspecifier,
utt2spk_rspecifier);

for (; !feat_reader.Done(); feat_reader.Next()) {
std::string utt = feat_reader.Key();
Matrix<BaseFloat> feat(feat_reader.Value());
Expand All @@ -101,7 +117,7 @@ int main(int argc, char *argv[]) {
Matrix<double> cmvn_stats = cmvn_reader.Value(utt);
if (!skip_dims.empty())
FakeStatsForSomeDims(skip_dims, &cmvn_stats);

if (reverse) {
ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
} else {
Expand All @@ -124,7 +140,7 @@ int main(int argc, char *argv[]) {
cmvn_stats.Read(ki.Stream(), binary);
if (!skip_dims.empty())
FakeStatsForSomeDims(skip_dims, &cmvn_stats);

for (;!feat_reader.Done(); feat_reader.Next()) {
std::string utt = feat_reader.Key();
Matrix<BaseFloat> feat(feat_reader.Value());
Expand All @@ -139,7 +155,7 @@ int main(int argc, char *argv[]) {
num_done++;
}
}
if (norm_vars)
if (norm_vars)
KALDI_LOG << "Applied cepstral mean and variance normalization to "
<< num_done << " utterances, errors on " << num_err;
else
Expand All @@ -151,4 +167,3 @@ int main(int argc, char *argv[]) {
return -1;
}
}

17 changes: 14 additions & 3 deletions src/featbin/copy-feats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ int main(int argc, char *argv[]) {
bool htk_in = false;
bool sphinx_in = false;
bool compress = false;
int32 compression_method_in = 1;
std::string num_frames_wspecifier;
po.Register("htk-in", &htk_in, "Read input as HTK features");
po.Register("sphinx-in", &sphinx_in, "Read input as Sphinx features");
Expand All @@ -50,6 +51,10 @@ int main(int argc, char *argv[]) {
po.Register("compress", &compress, "If true, write output in compressed form"
"(only currently supported for wxfilename, i.e. archive/script,"
"output)");
po.Register("compression-method", &compression_method_in,
"Only relevant if --compress=true; the method (1 through 6) to "
"compress the matrix. Search for CompressionMethod in "
"src/matrix/compressed-matrix.h.");
po.Register("write-num-frames", &num_frames_wspecifier,
"Wspecifier to write length in frames of each utterance. "
"e.g. 'ark,t:utt2num_frames'. Only applicable if writing tables, "
Expand All @@ -65,6 +70,9 @@ int main(int argc, char *argv[]) {

int32 num_done = 0;

CompressionMethod compression_method = static_cast<CompressionMethod>(
compression_method_in);

if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier) {
// Copying tables of features.
std::string rspecifier = po.GetArg(1);
Expand Down Expand Up @@ -104,7 +112,8 @@ int main(int argc, char *argv[]) {
SequentialTableReader<HtkMatrixHolder> htk_reader(rspecifier);
for (; !htk_reader.Done(); htk_reader.Next(), num_done++) {
kaldi_writer.Write(htk_reader.Key(),
CompressedMatrix(htk_reader.Value().first));
CompressedMatrix(htk_reader.Value().first,
compression_method));
if (!num_frames_wspecifier.empty())
num_frames_writer.Write(htk_reader.Key(),
htk_reader.Value().first.NumRows());
Expand All @@ -113,7 +122,8 @@ int main(int argc, char *argv[]) {
SequentialTableReader<SphinxMatrixHolder<> > sphinx_reader(rspecifier);
for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) {
kaldi_writer.Write(sphinx_reader.Key(),
CompressedMatrix(sphinx_reader.Value()));
CompressedMatrix(sphinx_reader.Value(),
compression_method));
if (!num_frames_wspecifier.empty())
num_frames_writer.Write(sphinx_reader.Key(),
sphinx_reader.Value().NumRows());
Expand All @@ -122,7 +132,8 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader kaldi_reader(rspecifier);
for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) {
kaldi_writer.Write(kaldi_reader.Key(),
CompressedMatrix(kaldi_reader.Value()));
CompressedMatrix(kaldi_reader.Value(),
compression_method));
if (!num_frames_wspecifier.empty())
num_frames_writer.Write(kaldi_reader.Key(),
kaldi_reader.Value().NumRows());
Expand Down
Loading