diff --git a/egs/wsj/s5/steps/compare_alignments.sh b/egs/wsj/s5/steps/compare_alignments.sh new file mode 100755 index 00000000000..d72de435812 --- /dev/null +++ b/egs/wsj/s5/steps/compare_alignments.sh @@ -0,0 +1,207 @@ +#!/bin/bash + +# Copyright 2018 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0. + +set -e +stage=0 +cmd=run.pl # We use this only for get_ctm.sh, which can be a little slow. +num_to_sample=1000 # We sample this many utterances for human-readable display, starting from the worst and then + # starting from the middle. +cleanup=true + +if [ -f ./path.sh ]; then . ./path.sh; fi + +. ./utils/parse_options.sh + +if [ $# -ne 5 ] && [ $# -ne 6 ]; then + cat < ( | ) + e.g.: $0 data/lang data/train exp/tri2_ali exp/tri3_ali exp/compare_ali_2_3 + + Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + # (passed through to get_train_ctm.sh) + --cleanup # Specify --cleanup false to prevent + # cleanup of temporary files. + --stage # Enables you to run part of the script. + +EOF + exit 1 +fi + +lang=$1 +data1=$2 +if [ $# -eq 5 ]; then + data2=$2 + ali_dir1=$3 + ali_dir2=$4 + dir=$5 +else + data2=$3 + ali_dir1=$4 + ali_dir2=$5 + dir=$6 +fi + +for f in $lang/phones.txt $ali_dir1/ali.1.gz $ali_dir2/ali.2.gz; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +nj1=$(cat $ali_dir1/num_jobs) +nj2=$(cat $ali_dir2/num_jobs) + +mkdir -p $dir/log + + +if [ $stage -le 0 ]; then + echo "$0: converting alignments to phones." + + for j in $(seq $nj1); do gunzip -c $ali_dir1/ali.$j.gz; done | \ + ali-to-phones --per-frame=true $ali_dir1/final.mdl ark:- ark:- | gzip -c > $dir/phones1.gz + + for j in $(seq $nj2); do gunzip -c $ali_dir2/ali.$j.gz; done | \ + ali-to-phones --per-frame=true $ali_dir2/final.mdl ark:- ark:- | gzip -c > $dir/phones2.gz +fi + +if [ $stage -le 1 ]; then + echo "$0: getting comparison stats and utterance stats." + compare-int-vector --binary=false --write-confusion-matrix=$dir/conf.mat \ + "ark:gunzip -c $dir/phones1.gz|" "ark:gunzip -c $dir/phones2.gz|" 2>$dir/log/compare_phones.log > $dir/utt_stats.phones + tail -n 8 $dir/log/compare_phones.log +fi + +if [ $stage -le 3 ]; then + cat $dir/conf.mat | grep -v -F '[' | sed 's/]//' | awk '{n=NF; for (k=1;k<=n;k++) { conf[NR,k] = $k; row_tot[NR] += $k; col_tot[k] += $k; } } END{ + for (row=1;row<=n;row++) for (col=1;col<=n;col++) { + val = conf[row,col]; this_row_tot = row_tot[row]; this_col_tot = col_tot[col]; + rval=conf[col,row] + min_tot = (this_row_tot < this_col_tot ? this_row_tot : this_col_tot); + if (val != 0) { + phone1 = row-1; phone2 = col-1; + if (row == col) printf("COR %d %d %.2f%\n", phone1, val, (val * 100 / this_row_tot)); + else { + norm_prob = val * val / min_tot; # heuristic for sorting. + printf("SUB %d %d %d %d %.2f%% %.2f%%\n", + norm_prob, phone1, phone2, val, (val * 100 / min_tot), (rval * 100 / min_tot)); }}}}' > $dir/phone_stats.all + + ( + echo "# Format: " + grep '^COR' $dir/phone_stats.all | sort -n -k4,4 | awk '{print $2, $3, $4}' | utils/int2sym.pl -f 1 $lang/phones.txt + ) > $dir/phones_correct.txt + + ( + echo "#Format: " + echo "# is the number of frames that were labeled in the first" + echo "# set of alignments and in the second." + echo "# is divided by the smaller of the total num-frames of" + echo "# phone1 or phone2, whichever is smaller; expressed as a percentage." + echo "# is the same but for the reverse substitution, from" + echo "# to ; the comparison with the substitutions are)." + grep '^SUB' $dir/phone_stats.all | sort -nr -k2,2 | awk '{print $3,$4,$5,$6,$7}' | utils/int2sym.pl -f 1-2 $lang/phones.txt + ) > $dir/phone_subs.txt +fi + +if [ $stage -le 4 ]; then + echo "$0: getting CTMs" + steps/get_train_ctm.sh --use-segments false --print-silence true --cmd "$cmd" --frame-shift 1.0 $data1 $lang $ali_dir1 $dir/ctm1 + steps/get_train_ctm.sh --use-segments false --print-silence true --cmd "$cmd" --frame-shift 1.0 $data2 $lang $ali_dir2 $dir/ctm2 +fi + +if [ $stage -le 5 ]; then + for n in 1 2; do + cat $dir/ctm${n}/ctm | utils/sym2int.pl -f 5 $lang/words.txt | \ + awk 'BEGIN{utt_id="";} { if (utt_id != $1) { if (utt_id != "") printf("\n"); utt_id=$1; printf("%s ", utt_id); } t_start=int($3); t_end=t_start + int($4); word=$5; for (t=t_start; t$dir/words${n}.gz + done +fi + +if [ $stage -le 5 ]; then + compare-int-vector --binary=false --write-tot-counts=$dir/words_tot.vec --write-diff-counts=$dir/words_diff.vec \ + "ark:gunzip -c $dir/words1.gz|" "ark:gunzip -c $dir/words2.gz|" 2>$dir/log/compare_words.log >$dir/utt_stats.words + tail -n 8 $dir/log/compare_words.log +fi + +if [ $stage -le 6 ]; then + + ( echo "# Word stats. Format:"; + echo " " + + paste <(awk '{for (n=2;n 0) print $1*$1/$2, $1/$2, $1, $2, (NR-1)}' | utils/int2sym.pl -f 5 $lang/words.txt | \ + sort -nr | awk '{print $2, $3, $4, $5;}' + ) > $dir/word_stats.txt + +fi + +if [ $stage -le 7 ]; then + for type in phones words; do + num_utts=$(wc -l <$dir/utt_stats.$type) + cat $dir/utt_stats.$type | awk -v type=$type 'BEGIN{print "Utterance-id proportion-"type"-changed num-frames num-wrong-frames"; } + {print $1, $3 * 1.0 / $2, $2, $3; }' | sort -nr -k2,2 > $dir/utt_stats.$type.sorted + ( + echo "$0: Percentiles 100, 90, .. 0 of proportion-$type-changed distribution (over utterances) are:" + cat $dir/utt_stats.$type.sorted | awk -v n=$num_utts 'BEGIN{k=int((n-1)/10);} {if (NR % k == 1) printf("%s ", $2); } END{print "";}' + ) | tee $dir/utt_stats.$type.percentiles + done +fi + + +if [ $stage -le 8 ]; then + # Display the 1000 worst utterances, and 1000 utterances from the middle of the pack, in a readable format. + num_utts=$(wc -l <$dir/utt_stats.words.sorted) + half_num_utts=$[$num_utts/2]; + if [ $num_to_sample -gt $half_num_utts ]; then + num_to_sample=$half_num_utts + fi + head -n $num_to_sample $dir/utt_stats.words.sorted | awk '{print $1}' > $dir/utt_ids.worst + tail -n +$half_num_utts $dir/utt_stats.words.sorted | head -n $num_to_sample | awk '{print $1}' > $dir/utt_ids.mid + + for suf in worst mid; do + for n in 1 2; do + gunzip -c $dir/phones${n}.gz | copy-int-vector ark:- ark,t:- | utils/filter_scp.pl $dir/utt_ids.$suf >$dir/temp + # the next command reorders them, and duplicates the utterance-idwhich we'll later use + # that to display the word sequence. + awk '{print $1,$1,$1}' <$dir/utt_ids.$suf | utils/apply_map.pl -f 3 $dir/temp > $dir/phones${n}.$suf + rm $dir/temp + done + # the stuff with 0 and below is a kind of hack so that if the phones are the same, we end up + # with just the phone, but if different, we end up with p1/p2. + # The apply_map.pl stuff is to put the transcript there. + + ( + echo "# Format: ... ... " + echo "# If the two alignments have the same phone, just that phone will be printed;" + echo "# otherwise the two phones will be printed, as in 'phone1/phone2'. So '/' is present" + echo "# whenever there is a mismatch." + + paste $dir/phones1.$suf $dir/phones2.$suf | perl -ane ' @A = split("\t", $_); @A1 = split(" ", $A[0]); @A2 = split(" ", $A[1]); + $utt = shift @A1; shift @A2; print $utt, " "; + for ($n = 0; $n < @A1 && $n < @A2; $n++) { $a1=$A1[$n]; $a2=$A2[$n]; if ($a1 eq $a2) { print "$a1 "; } else { print "$a1 0 $a2 "; }} + print "\n" ' | utils/int2sym.pl -f 3- $lang/phones.txt | sed 's: :/:g' | \ + utils/apply_map.pl -f 2 $data1/text + ) > $dir/compare_phones_${suf}.txt + done +fi + + +if [ $stage -le 9 ] && $cleanup; then + rm $dir/phones{1,2}.gz $dir/words{1,2}.gz $dir/ctm*/ctm $dir/*.vec $dir/conf.mat \ + $dir/utt_ids.* $dir/phones{1,2}.{mid,worst} $dir/utt_stats.{phones,words} \ + $dir/phone_stats.all +fi + +# clean up +exit 0 diff --git a/egs/wsj/s5/steps/get_train_ctm.sh b/egs/wsj/s5/steps/get_train_ctm.sh index 878e11e45ac..6942014fc88 100755 --- a/egs/wsj/s5/steps/get_train_ctm.sh +++ b/egs/wsj/s5/steps/get_train_ctm.sh @@ -20,8 +20,9 @@ echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; -if [ $# -ne 3 ]; then - echo "Usage: $0 [options] " +if [ $# -ne 3 ] && [ $# -ne 4 ]; then + echo "Usage: $0 [options] []" + echo "( defaults to .)" echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." echo " --stage (0|1|2) # start scoring script from part-way through." @@ -39,27 +40,31 @@ fi data=$1 lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 +ali_dir=$3 +dir=$4 +if [ -z $dir ]; then + dir=$ali_dir +fi -model=$dir/final.mdl # assume model one level up from decoding dir. +model=$ali_dir/final.mdl # assume model one level up from decoding dir. -for f in $lang/words.txt $model $dir/ali.1.gz $lang/oov.int; do +for f in $lang/words.txt $model $ali_dir/ali.1.gz $lang/oov.int; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done oov=`cat $lang/oov.int` || exit 1; -nj=`cat $dir/num_jobs` || exit 1; +nj=`cat $ali_dir/num_jobs` || exit 1; split_data.sh $data $nj || exit 1; sdata=$data/split$nj -mkdir -p $dir/log +mkdir -p $dir/log || exit 1; if [ $stage -le 0 ]; then if [ -f $lang/phones/word_boundary.int ]; then $cmd JOB=1:$nj $dir/log/get_ctm.JOB.log \ - set -o pipefail '&&' linear-to-nbest "ark:gunzip -c $dir/ali.JOB.gz|" \ + set -o pipefail '&&' linear-to-nbest "ark:gunzip -c $ali_dir/ali.JOB.gz|" \ "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \ '' '' ark:- \| \ lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ @@ -72,7 +77,7 @@ if [ $stage -le 0 ]; then exit 1; fi $cmd JOB=1:$nj $dir/log/get_ctm.JOB.log \ - set -o pipefail '&&' linear-to-nbest "ark:gunzip -c $dir/ali.JOB.gz|" \ + set -o pipefail '&&' linear-to-nbest "ark:gunzip -c $ali_dir/ali.JOB.gz|" \ "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \ '' '' ark:- \| \ lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ @@ -94,4 +99,3 @@ if [ $stage -le 1 ]; then fi rm $dir/ctm.*.gz fi - diff --git a/src/bin/Makefile b/src/bin/Makefile index b0a660a6ad1..7cb01b50120 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -21,7 +21,8 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \ post-to-pdf-post logprob-to-post prob-to-post copy-post \ matrix-sum build-pfile-from-ali get-post-on-ali tree-info am-info \ vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \ - transform-vec align-text matrix-dim post-to-smat compile-graph + transform-vec align-text matrix-dim post-to-smat compile-graph \ + compare-int-vector OBJFILES = @@ -30,7 +31,7 @@ ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../lm/kaldi-lm.a \ ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../base/kaldi-base.a TESTFILES = diff --git a/src/bin/ali-to-phones.cc b/src/bin/ali-to-phones.cc index 2a76000cfae..602e32e9768 100644 --- a/src/bin/ali-to-phones.cc +++ b/src/bin/ali-to-phones.cc @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { " ali-to-phones 1.mdl ark:1.ali ark:-\n" "or:\n" " ali-to-phones --ctm-output 1.mdl ark:1.ali 1.ctm\n" - "See also: show-alignments lattice-align-phones\n"; + "See also: show-alignments lattice-align-phones, compare-int-vector\n"; ParseOptions po(usage); bool per_frame = false; bool write_lengths = false; @@ -137,5 +137,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/bin/compare-int-vector.cc b/src/bin/compare-int-vector.cc new file mode 100644 index 00000000000..5f80ff5ee6c --- /dev/null +++ b/src/bin/compare-int-vector.cc @@ -0,0 +1,184 @@ +// bin/compare-int-vector.cc + +// Copyright 2018 Johns Hopkins University (Author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-vector.h" +#include "transform/transform-common.h" +#include + + +namespace kaldi { +void AddToCount(int32 location_to_add, + double value_to_add, + std::vector *counts) { + if (location_to_add < 0) + KALDI_ERR << "Contents of vectors cannot be " + "negative if --write-tot-counts or --write-diff-counts " + "options are provided."; + if (counts->size() <= static_cast(location_to_add)) + counts->resize(location_to_add + 1, 0.0); + (*counts)[location_to_add] += value_to_add; +} + +void AddToConfusionMatrix(int32 phone1, int32 phone2, + Matrix *counts) { + if (phone1 < 0 || phone2 < 0) + KALDI_ERR << "Contents of vectors cannot be " + "negative if --write-confusion-matrix option is " + "provided."; + int32 max_size = std::max(phone1, phone2) + 1; + if (counts->NumRows() < max_size) + counts->Resize(max_size, max_size, kCopyData); + (*counts)(phone1, phone2) += 1.0; +} + + +void WriteAsKaldiVector(const std::vector &counts, + std::string &wxfilename, + bool binary) { + Vector counts_vec(counts.size()); + for (size_t i = 0; i < counts.size(); i++) + counts_vec(i) = counts[i]; + WriteKaldiObject(counts_vec, wxfilename, binary); +} + +} // namespace kaldi + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Compare vectors of integers (e.g. phone alignments)\n" + "Prints to stdout fields of the form:\n" + " \n" + "\n" + "e.g.:\n" + " SWB1_A_31410_32892 420 36\n" + "\n" + "Usage:\n" + "compare-int-vector [options] \n" + "\n" + "e.g. compare-int-vector scp:foo.scp scp:bar.scp > comparison\n" + "E.g. the inputs might come from ali-to-phones.\n" + "Warnings are printed if the vector lengths differ for a given utterance-id,\n" + "and in those cases, the number of frames printed will be the smaller of the\n" + "\n" + "See also: ali-to-phones, copy-int-vector\n"; + + + ParseOptions po(usage); + + std::string tot_wxfilename, + diff_wxfilename, + confusion_matrix_wxfilename; + bool binary = true; + + po.Register("binary", &binary, "If true, write in binary mode (only applies " + "if --write-tot-counts or --write-diff-counts options are supplied)."); + po.Register("write-tot-counts", &tot_wxfilename, "Filename to write " + "vector of total counts. These may be summed with 'vector-sum'."); + po.Register("write-diff-counts", &diff_wxfilename, "Filename to write " + "vector of counts of phones (or whatever is in the inputs) " + "that differ from one vector to the other. Each time a pair differs, " + "0.5 will be added to each one's location."); + po.Register("write-confusion-matrix", &confusion_matrix_wxfilename, + "Filename to write confusion matrix, indexed by [phone1][phone2]." + "These may be summed by 'matrix-sum'."); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string vector1_rspecifier = po.GetArg(1), + vector2_rspecifier = po.GetArg(2); + + int64 num_done = 0, + num_not_found = 0, + num_mismatched_lengths = 0, + tot_frames = 0, tot_difference = 0; + + std::vector diff_counts; + std::vector tot_counts; + Matrix confusion_matrix; + + SequentialInt32VectorReader reader1(vector1_rspecifier); + RandomAccessInt32VectorReader reader2(vector2_rspecifier); + + for (; !reader1.Done(); reader1.Next(), num_done++) { + const std::string &key = reader1.Key(); + if (!reader2.HasKey(key)) { + KALDI_WARN << "No key " << key << " found in second input."; + num_not_found++; + continue; + } + const std::vector &value1 = reader1.Value(), + &value2 = reader2.Value(key); + size_t len1 = value1.size(), len2 = value2.size(); + if (len1 != len2) { + KALDI_WARN << "For utterance " << key << ", lengths differ " + << len1 << " vs. " << len2; + num_mismatched_lengths++; + } + size_t len = std::min(len1, len2), + difference = 0; + for (size_t i = 0; i < len; i++) { + int32 phone1 = value1[i], phone2 = value2[i]; + if (phone1 != phone2) { + difference++; + if (!diff_wxfilename.empty()) { + AddToCount(phone1, 0.5, &diff_counts); + AddToCount(phone2, 0.5, &diff_counts); + } + } + if (!tot_wxfilename.empty()) + AddToCount(phone1, 1.0, &tot_counts); + if (!confusion_matrix_wxfilename.empty()) + AddToConfusionMatrix(phone1, phone2, &confusion_matrix); + } + num_done++; + std::cout << key << " " << len << " " << difference << "\n"; + tot_frames += len; + tot_difference += difference; + } + + BaseFloat difference_percent = tot_difference * 100.0 / tot_frames; + KALDI_LOG << "Computed difference for " << num_done << " utterances, of which " + << num_mismatched_lengths << " had mismatched lengths; corresponding " + "utterance not found for " << num_not_found; + KALDI_LOG << "Average p(different) is " << std::setprecision(4) << difference_percent + << "%, over " << tot_frames << " frames."; + + if (!tot_wxfilename.empty()) + WriteAsKaldiVector(tot_counts, tot_wxfilename, binary); + if (!diff_wxfilename.empty()) + WriteAsKaldiVector(diff_counts, diff_wxfilename, binary); + if (!confusion_matrix_wxfilename.empty()) + WriteKaldiObject(confusion_matrix, confusion_matrix_wxfilename, binary); + + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/bin/matrix-sum.cc b/src/bin/matrix-sum.cc index 8a7b5a39e00..3c93dfd0d39 100644 --- a/src/bin/matrix-sum.cc +++ b/src/bin/matrix-sum.cc @@ -238,17 +238,20 @@ int32 TypeThreeUsage(const ParseOptions &po, << "tables, the intermediate arguments must not be tables."; } - bool add = true; - Matrix mat; + Matrix sum; for (int32 i = 1; i < po.NumArgs(); i++) { - bool binary_in; - Input ki(po.GetArg(i), &binary_in); - // this Read function will throw if there is a size mismatch. - mat.Read(ki.Stream(), binary_in, add); + Matrix this_mat; + ReadKaldiObject(po.GetArg(i), &this_mat); + if (sum.NumRows() < this_mat.NumRows() || + sum.NumCols() < this_mat.NumCols()) + sum.Resize(std::max(sum.NumRows(), this_mat.NumRows()), + std::max(sum.NumCols(), this_mat.NumCols()), + kCopyData); + sum.AddMat(1.0, this_mat); } if (average) - mat.Scale(1.0 / (po.NumArgs() - 1)); - WriteKaldiObject(mat, po.GetArg(po.NumArgs()), binary); + sum.Scale(1.0 / (po.NumArgs() - 1)); + WriteKaldiObject(sum, po.GetArg(po.NumArgs()), binary); KALDI_LOG << "Summed " << (po.NumArgs() - 1) << " matrices; " << "wrote sum to " << PrintableWxfilename(po.GetArg(po.NumArgs())); return 0; @@ -335,4 +338,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/bin/vector-sum.cc b/src/bin/vector-sum.cc index 42404e38384..3e622cafdc7 100644 --- a/src/bin/vector-sum.cc +++ b/src/bin/vector-sum.cc @@ -1,7 +1,7 @@ // bin/vector-sum.cc -// Copyright 2014 Vimal Manohar -// 2014 Johns Hopkins University (author: Daniel Povey) +// Copyright 2014 Vimal Manohar +// 2014-2018 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -43,7 +43,7 @@ int32 TypeOneUsage(const ParseOptions &po) { // Input vectors SequentialBaseFloatVectorReader vector_reader1(vector_in_fn1); - std::vector vector_readers(num_args-2, + std::vector vector_readers(num_args-2, static_cast(NULL)); std::vector vector_in_fns(num_args-2); for (int32 i = 2; i < num_args; ++i) { @@ -51,7 +51,7 @@ int32 TypeOneUsage(const ParseOptions &po) { vector_in_fns[i-2] = po.GetArg(i); } - int32 n_utts = 0, n_total_vectors = 0, + int32 n_utts = 0, n_total_vectors = 0, n_success = 0, n_missing = 0, n_other_errors = 0; for (; !vector_reader1.Done(); vector_reader1.Next()) { @@ -70,10 +70,10 @@ int32 TypeOneUsage(const ParseOptions &po) { if (vector2.Dim() == vector_out.Dim()) { vector_out.AddVec(1.0, vector2); } else { - KALDI_WARN << "Dimension mismatch for utterance " << key + KALDI_WARN << "Dimension mismatch for utterance " << key << " : " << vector2.Dim() << " for " << "system " << (i + 2) << ", rspecifier: " - << vector_in_fns[i] << " vs " << vector_out.Dim() + << vector_in_fns[i] << " vs " << vector_out.Dim() << " primary vector, rspecifier:" << vector_in_fn1; n_other_errors++; } @@ -94,9 +94,9 @@ int32 TypeOneUsage(const ParseOptions &po) { << " different systems"; KALDI_LOG << "Produced output for " << n_success << " utterances; " << n_missing << " total missing vectors"; - + DeletePointers(&vector_readers); - + return (n_success != 0 && n_missing < (n_success - n_missing)) ? 0 : 1; } @@ -108,13 +108,13 @@ int32 TypeTwoUsage(const ParseOptions &po, "vector-sum: first argument must be an rspecifier"); // if next assert fails it would be bug in the code as otherwise we shouldn't // be called. - KALDI_ASSERT(ClassifyWspecifier(po.GetArg(2), NULL, NULL, NULL) == + KALDI_ASSERT(ClassifyWspecifier(po.GetArg(2), NULL, NULL, NULL) == kNoWspecifier); SequentialBaseFloatVectorReader vec_reader(po.GetArg(1)); Vector sum; - + int32 num_done = 0, num_err = 0; for (; !vec_reader.Done(); vec_reader.Next()) { @@ -134,7 +134,7 @@ int32 TypeTwoUsage(const ParseOptions &po, } } } - + if (num_done > 0 && average) sum.Scale(1.0 / num_done); Vector sum_float(sum); @@ -157,21 +157,21 @@ int32 TypeThreeUsage(const ParseOptions &po, << "tables, the intermediate arguments must not be tables."; } } - if (ClassifyWspecifier(po.GetArg(po.NumArgs()), NULL, NULL, NULL) != + if (ClassifyWspecifier(po.GetArg(po.NumArgs()), NULL, NULL, NULL) != kNoWspecifier) { KALDI_ERR << "Wrong usage (type 3): if first and last arguments are not " << "tables, the intermediate arguments must not be tables."; } - bool add = true; - Vector vec; + Vector sum; for (int32 i = 1; i < po.NumArgs(); i++) { - bool binary_in; - Input ki(po.GetArg(i), &binary_in); - // this Read function will throw if there is a size mismatch. - vec.Read(ki.Stream(), binary_in, add); + Vector this_vec; + ReadKaldiObject(po.GetArg(i), &this_vec); + if (sum.Dim() < this_vec.Dim()) + sum.Resize(this_vec.Dim(), kCopyData);; + sum.AddVec(1.0, this_vec); } - WriteKaldiObject(vec, po.GetArg(po.NumArgs()), binary); + WriteKaldiObject(sum, po.GetArg(po.NumArgs()), binary); KALDI_LOG << "Summed " << (po.NumArgs() - 1) << " vectors; " << "wrote sum to " << PrintableWxfilename(po.GetArg(po.NumArgs())); return 0; @@ -201,15 +201,15 @@ int main(int argc, char *argv[]) { " \n" " e.g.: vector-sum --binary=false 1.vec 2.vec 3.vec sum.vec\n" "See also: copy-vector, dot-weights\n"; - + bool binary, average = false; - + ParseOptions po(usage); po.Register("binary", &binary, "If true, write output as binary (only " "relevant for usage types two or three"); po.Register("average", &average, "Do average instead of sum"); - + po.Read(argc, argv); int32 N = po.NumArgs(), exit_status; @@ -226,11 +226,11 @@ int main(int argc, char *argv[]) { exit_status = TypeTwoUsage(po, binary, average); } else if (po.NumArgs() >= 2 && ClassifyRspecifier(po.GetArg(1), NULL, NULL) == kNoRspecifier && - ClassifyWspecifier(po.GetArg(N), NULL, NULL, NULL) == + ClassifyWspecifier(po.GetArg(N), NULL, NULL, NULL) == kNoWspecifier) { // summing flat files. exit_status = TypeThreeUsage(po, binary); - } else { + } else { po.PrintUsage(); exit(1); } diff --git a/src/feat/feature-common-inl.h b/src/feat/feature-common-inl.h index 546f272e821..ad8fa244982 100644 --- a/src/feat/feature-common-inl.h +++ b/src/feat/feature-common-inl.h @@ -49,8 +49,7 @@ void OfflineFeatureTpl::ComputeFeatures( new_sample_freq, &downsampled_wave); Compute(downsampled_wave, vtln_warp, output); } else - KALDI_ERR << "The waveform is allowed to get downsampled." - << "New sample Frequency " << new_sample_freq + KALDI_ERR << "New sample Frequency " << new_sample_freq << " is larger than waveform original sampling frequency " << sample_freq;