Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2098,20 +2098,25 @@ static void _diff_xent(const int32_cuda* vec_tgt, Real* mat_net_out, Real* vec_l
template<typename Real>
__global__
static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim,
Real* obfj_terms, MatrixDim objf_dim,
Real* obfj_derivs, MatrixDim derivs_dim) {
Real* objf_terms, MatrixDim objf_dim,
Real* objf_derivs, MatrixDim derivs_dim) {
int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x;
int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y;
int32_cuda scores_index = i + j * scores_dim.stride;
int32_cuda objf_index = i + j * objf_dim.stride;
int32_cuda derivs_index = i + j * derivs_dim.stride;
Real K = 1.0 / (scores_dim.rows - 2.0);
Real L = scores[scores_index];
if (i < scores_dim.cols && j < scores_dim.rows) {
if (i + 1 == j && i % 2 == 0) {
obfj_terms[scores_index] = log(1.0 + exp(-L));
obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L));
objf_terms[objf_index] = L < -15 ? -L : log(1.0 + exp(-L));
objf_derivs[derivs_index] = 1.0 / (1.0 + exp(L));
} else if (i < j) {
obfj_terms[scores_index] = K * log(1.0 + exp(L));
obfj_derivs[scores_index] = -K / (1.0 + exp(-L));
objf_terms[objf_index] = K * (L > 15 ? L : log(1.0 + exp(L)));
objf_derivs[derivs_index] = -K / (1.0 + exp(-L));
} else {
objf_terms[objf_index] = 0.0;
objf_derivs[derivs_index] = 0.0;
}
}
}
Expand Down
15 changes: 10 additions & 5 deletions src/cudamatrix/cu-math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,21 @@ void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
} else
#endif
{
// Compute the xvector objective function and its derivatives in the CPU.
int32 num_rows = scores.NumRows();
BaseFloat K = 1.0 / (num_rows - 2.0);
for (int32 i = 0; i < num_rows; i++) {
for (int32 j = i + 1; j < num_rows; j++) {
for (int32 j = 0; j < num_rows; j++) {
BaseFloat L = scores(i, j);
if (i + 1 == j && i % 2 == 0) {
(*objf_terms)(i, j) = log(1.0 + exp(-scores(i, j)));
(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(scores(i, j)));
(*objf_terms)(i, j) = L < -15 ? -L : log(1.0 + exp(-L));
(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(L));
} else if (i < j) {
(*objf_terms)(i, j) = K * (L > 15 ? L : log(1.0 + exp(L)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-L));
} else {
(*objf_terms)(i, j) = K * log(1.0 + exp(scores(i, j)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-scores(i, j)));
(*objf_terms)(i, j) = 0;
(*objf_derivs)(i, j) = 0;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/ivector/xvector-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void TestComputeXvectorObjfAndDeriv(
BaseFloat *tot_weight);

bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
int32 xvector_dim = RandInt(4, 50),
int32 xvector_dim = RandInt(4, 100),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand Down Expand Up @@ -126,7 +126,7 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
}

bool TestXvectorComputeObjf() {
int32 xvector_dim = RandInt(4, 40),
int32 xvector_dim = RandInt(4, 100),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand Down
8 changes: 4 additions & 4 deletions src/ivector/xvector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ void ComputeXvectorObjfAndDeriv(
P(N, xvector_dim),
Q(N, N),
R(N, N),
scores(N, N), // The raw scores.
objf_terms(N, N),
objf_deriv_terms(N, N); // Derivative of the
// objf w.r.t. the scores.
scores(N, N), // The raw scores.
objf_terms(N, N, kUndefined),
objf_deriv_terms(N, N, // Derivative of the
kUndefined); // objf w.r.t. the scores.
CuVector<BaseFloat> r(N);

P.AddMatMat(1.0, xvector_pairs, kNoTrans, S_tmp, kNoTrans, 0.0);
Expand Down
6 changes: 3 additions & 3 deletions src/ivector/xvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace kaldi {
/*
Computes the training objective function and the derivatives for
the xvector. Let N = xvector_pairs.NumRows() be the number of
xvectors. There are N(N-1)/2 pairs in total and N from the same
xvectors. There are N(N-1)/2 pairs in total and N/2 from the same
class. Let v(n) be the n'th row of the matrix xvector_pairs.
The total objective function written to 'tot_objf' is
\sum_{n=0}^{N/2} p_same(v(n*2), v(n*2+1))
Expand Down Expand Up @@ -61,9 +61,9 @@ namespace kaldi {
the objective function with respect to the parameter b is written here.
@param [out] tot_objf The total objective function described above
@param [out] tot_weight The total normalizing factor for the objective
function, equal to dvector_pairs.NumRows().
function, equal to xvector_pairs.NumRows().
*/
void ComputeXvectorObjfAndDeriv(const CuMatrixBase<BaseFloat> &dvector_pairs,
void ComputeXvectorObjfAndDeriv(const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b,
CuMatrixBase<BaseFloat> *deriv_xvector,
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3bin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \
nnet3-am-adjust-priors nnet3-am-copy nnet3-compute-prob \
nnet3-average nnet3-am-info nnet3-combine nnet3-latgen-faster \
nnet3-copy nnet3-show-progress nnet3-align-compiled \
nnet3-get-egs-dense-targets nnet3-compute
nnet3-get-egs-dense-targets nnet3-compute nnet3-xvector-get-egs

OBJFILES =

Expand Down
240 changes: 240 additions & 0 deletions src/nnet3bin/nnet3-xvector-get-egs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// nnet3bin/nnet3-xvector-get-egs.cc

// Copyright 2012-2016 Johns Hopkins University (author: Daniel Povey)
// 2016 David Snyder

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "util/common-utils.h"
#include "nnet3/nnet-example.h"

namespace kaldi {
namespace nnet3 {

// A struct for holding information about the position and
// duration of each pair of chunks.
struct ChunkPairInfo {
std::string pair_name;
int32 output_archive_id;
int32 start_frame1;
int32 start_frame2;
int32 num_frames1;
int32 num_frames2;
};

// Process the range input file and store it as a map from utterance
// name to vector of ChunkPairInfo structs.
static void ProcessRangeFile(const std::string &range_rxfilename,
unordered_map<std::string,
std::vector<ChunkPairInfo *> > *utt_to_pairs) {
Input range_input(range_rxfilename);
if (!range_rxfilename.empty()) {
std::string line;
while (std::getline(range_input.Stream(), line)) {
ChunkPairInfo *pair = new ChunkPairInfo();
std::vector<std::string> fields;
SplitStringToVector(line, " \t\n\r", true, &fields);
if (fields.size() != 6)
KALDI_ERR << "Expected 6 fields in line of range file, got "
<< fields.size() << " instead.";

std::string utt = fields[0],
start_frame1_str = fields[2],
num_frames1_str = fields[3],
start_frame2_str = fields[4],
num_frames2_str = fields[5];

if (!ConvertStringToInteger(fields[1], &(pair->output_archive_id))
|| !ConvertStringToInteger(start_frame1_str, &(pair->start_frame1))
|| !ConvertStringToInteger(start_frame2_str, &(pair->start_frame2))
|| !ConvertStringToInteger(num_frames1_str, &(pair->num_frames1))
|| !ConvertStringToInteger(num_frames2_str, &(pair->num_frames2)))
KALDI_ERR << "Expected integer for output archive in range file.";
pair->pair_name = utt + "-" + start_frame1_str + "-" + num_frames1_str
+ "-" + start_frame2_str + "-" + num_frames2_str;
unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
got = utt_to_pairs->find(utt);
if (got == utt_to_pairs->end()) {
std::vector<ChunkPairInfo* > pairs;
pairs.push_back(pair);
utt_to_pairs->insert(std::make_pair<std::string,
std::vector<ChunkPairInfo* > > (utt, pairs));
} else {
got->second.push_back(pair);
}
}
}
}

static void WriteExamples(const MatrixBase<BaseFloat> &feats,
const std::vector<ChunkPairInfo *> &pairs,
const std::string &utt,
bool compress,
int32 *num_egs_written,
std::vector<NnetExampleWriter *> *example_writers) {
for (std::vector<ChunkPairInfo *>::const_iterator it = pairs.begin();
it != pairs.end(); ++it) {
ChunkPairInfo *pair = *it;
NnetExample eg;
int32 num_rows = feats.NumRows(),
feat_dim = feats.NumCols();
if (num_rows < std::max(pair->num_frames1, pair->num_frames2)) {
KALDI_WARN << "Unable to create examples for utterance " << utt
<< ". Requested chunk size of "
<< std::max(pair->num_frames1, pair->num_frames2)
<< " but utterance has only " << num_rows << " frames.";
} else {
// The requested chunk positions are approximate. It's possible
// that they slightly exceed the number of frames in the utterance.
// If that occurs, we can shift the chunks location back slightly.
int32 shift1 = std::min(0, num_rows - pair->start_frame1
- pair->num_frames1),
shift2 = std::min(0, num_rows - pair->start_frame2
- pair->num_frames2);
SubMatrix<BaseFloat> chunk1(feats, pair->start_frame1 + shift1,
pair->num_frames1, 0, feat_dim),
chunk2(feats, pair->start_frame2 + shift2,
pair->num_frames2, 0, feat_dim);
NnetIo nnet_io1 = NnetIo("input1", 0, chunk1),
nnet_io2 = NnetIo("input2", 0, chunk2);
for (std::vector<Index>::iterator indx_it = nnet_io1.indexes.begin();
indx_it != nnet_io1.indexes.end(); ++indx_it)
indx_it->n = 0;
for (std::vector<Index>::iterator indx_it = nnet_io2.indexes.begin();
indx_it != nnet_io2.indexes.end(); ++indx_it)
indx_it->n = 1;

NnetExample eg;
eg.io.push_back(nnet_io1);
eg.io.push_back(nnet_io2);
if (compress)
eg.Compress();

if (pair->output_archive_id >= example_writers->size())
KALDI_ERR << "Requested output index exceeds number of specified "
<< "output files.";
(*example_writers)[pair->output_archive_id]->Write(
pair->pair_name, eg);
(*num_egs_written) += 1;
}
}
}

// Delete the dynamically allocated memory.
static void Cleanup(unordered_map<std::string,
std::vector<ChunkPairInfo *> > *utt_to_pairs,
std::vector<NnetExampleWriter *> *writers) {
for (unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
map_it = utt_to_pairs->begin();
map_it != utt_to_pairs->end(); ++map_it)
for (std::vector<ChunkPairInfo*>::iterator
vec_it = map_it->second.begin(); vec_it != map_it->second.end();
++vec_it)
delete *vec_it;
for (std::vector<NnetExampleWriter *>::iterator
it = writers->begin(); it != writers->end(); ++it)
delete *it;
}

} // namespace nnet3
} // namespace kaldi

int main(int argc, char *argv[]) {
try {
using namespace kaldi;
using namespace kaldi::nnet3;
typedef kaldi::int32 int32;

const char *usage =
"Get examples for training an nnet3 neural network for the xvector\n"
"system. Each output example contains a pair of feature chunks from\n"
"the same utterance. The location and length of the feature chunks\n"
"are specified in the 'ranges' file. Each line is interpreted as\n"
"follows:\n"
" <source-utterance> <output-archive-index> <start-frame-index1>"
" <num-frames1> <start-frame-index2> <num-frames2>\n"
"For example:\n"
" utt1 3 0 65 112 110\n"
" utt1 0 160 50 214 180\n"
" utt2 ...\n"
"\n"
"Usage: nnet3-xvector-get-egs [options] <ranges-filename> "
"<features-rspecifier> <egs-0-out> <egs-1-out> ... <egs-N-1-out>\n"
"\n"
"For example:\n"
"nnet3-xvector-get-egs ranges.1 \"$feats\" ark:egs_temp.1.ark"
" ark:egs_temp.2.ark ark:egs_temp.3.ark\n";

bool compress = true;

ParseOptions po(usage);
po.Register("compress", &compress, "If true, write egs in "
"compressed format.");

po.Read(argc, argv);

if (po.NumArgs() < 3) {
po.PrintUsage();
exit(1);
}

std::string
range_rspecifier = po.GetArg(1),
feature_rspecifier = po.GetArg(2);
std::vector<NnetExampleWriter *> example_writers;

for (int32 i = 3; i <= po.NumArgs(); i++)
example_writers.push_back(new NnetExampleWriter(po.GetArg(i)));

unordered_map<std::string, std::vector<ChunkPairInfo *> > utt_to_pairs;
ProcessRangeFile(range_rspecifier, &utt_to_pairs);
SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier);

int32 num_done = 0,
num_err = 0,
num_egs_written = 0;

for (; !feat_reader.Done(); feat_reader.Next()) {
std::string key = feat_reader.Key();
const Matrix<BaseFloat> &feats = feat_reader.Value();
unordered_map<std::string, std::vector<ChunkPairInfo*> >::iterator
got = utt_to_pairs.find(key);
if (got == utt_to_pairs.end()) {
KALDI_WARN << "Could not create examples from utterance "
<< key << " because it has no entry in the ranges "
<< "input file.";
num_err++;
} else {
std::vector<ChunkPairInfo *> pairs = got->second;
WriteExamples(feats, pairs, key, compress, &num_egs_written,
&example_writers);
num_done++;
}
}
Cleanup(&utt_to_pairs, &example_writers);

KALDI_LOG << "Finished generating examples, "
<< "successfully processed " << num_done
<< " feature files, wrote " << num_egs_written << " examples; "
<< num_err << " files had errors.";
return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}