Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2105,11 +2105,11 @@ static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim,
int32_cuda scores_index = i + j * scores_dim.stride;
Real K = 1.0 / (scores_dim.rows - 2.0);
Real L = scores[scores_index];
if (i < scores_dim.cols && j < scores_dim.rows && i < j) {
if (i < scores_dim.cols && j < scores_dim.rows) {
if (i + 1 == j && i % 2 == 0) {
obfj_terms[scores_index] = log(1.0 + exp(-L));
obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L));
} else if (i != j) {
} else if (i < j) {
obfj_terms[scores_index] = K * log(1.0 + exp(L));
obfj_derivs[scores_index] = -K / (1.0 + exp(-L));
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if I were you I'd do at this point
else {
objf_terms[scores_index] = 0.0;
objf_derivs[scores_index] = 0.0;
}
this will save you from having to initialize those to zero (i.e. you can size them with kUndefined)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, please don't assume that the stride of objf_deriv is the same as that of objf. It's against the interface.

Expand Down
34 changes: 18 additions & 16 deletions src/cudamatrix/cu-math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,11 @@ void Copy(const CuMatrixBase<Real> &src, const CuArray<int32> &copy_from_indices
}
}

template<typename Real>
void ComputeXvectorObjfFromScores(const CuMatrixBase<Real> &scores,
CuMatrixBase<Real> *objf_terms,
CuMatrixBase<Real> *objf_derivs) {
void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
CuMatrixBase<BaseFloat> *objf_terms,
CuMatrixBase<BaseFloat> *objf_derivs) {
KALDI_ASSERT(SameDim(*objf_terms, *objf_derivs)
&& SameDim(*objf_terms, scores));
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
Expand All @@ -226,8 +227,19 @@ void ComputeXvectorObjfFromScores(const CuMatrixBase<Real> &scores,
} else
#endif
{
// TODO: Add the CPU version.
KALDI_LOG << "NOT USING CUDA";
int32 num_rows = scores.NumRows();
BaseFloat K = 1.0 / (num_rows - 2.0);
for (int32 i = 0; i < num_rows; i++) {
for (int32 j = i + 1; j < num_rows; j++) {
if (i + 1 == j && i % 2 == 0) {
(*objf_terms)(i, j) = log(1.0 + exp(-scores(i, j)));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you'll need to be a little more careful about the possibility of overflow. I suggest using something like
x > 15.0 ? x : log(1.0 + exp(x))

(*objf_derivs)(i, j) = 1.0 / (1.0 + exp(scores(i, j)));
} else {
(*objf_terms)(i, j) = K * log(1.0 + exp(scores(i, j)));
(*objf_derivs)(i, j) = -K / (1.0 + exp(-scores(i, j)));
}
}
}
}
}

Expand Down Expand Up @@ -259,16 +271,6 @@ void Randomize(const CuMatrixBase<double> &src,
const CuArray<int32> &copy_from_idx,
CuMatrixBase<double> *tgt);

template
void ComputeXvectorObjfFromScores(const CuMatrixBase<float> &scores,
CuMatrixBase<float> *objf_terms,
CuMatrixBase<float> *objf_derivs);
template
void ComputeXvectorObjfFromScores(const CuMatrixBase<double> &scores,
CuMatrixBase<double> *objf_terms,
CuMatrixBase<double> *objf_derivs);



} //namespace cu

Expand Down
26 changes: 24 additions & 2 deletions src/cudamatrix/cu-math.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,31 @@ void Group2norm(const CuMatrixBase<Real> &src,
int32 group_stride);

/*
TODO: Documentation.
This function is used in computing the objective function and derivatives
in xvector training.
@param [in] scores 'scores' is a symmetric matrix of scores which are to
be interpreted as log-odds (according to the model) of pairs coming from the
same class, so scores(i, j) is the model's log p(same/different) for
elements i and j of the original minibatch of input. We assume that the data
in 'scores' has been arranged in such a way that pairs of indexes of the form
(2k, 2k+1), e.g., (0, 1), (2, 3), (4, 5), etc, are from the same class, but
indexes of any other form, such as (0, 2), (1, 2), etc, are from different
classes.
@param [out] objf_terms 'objf_terms' is a matrix of the same dimension as
'scores' whose elements we will sum to get the objective function for this
minibatch. This function computes the appropriate contributions to the
objective function, as follows.
if i == j:
objf_terms(i, j)== 0 # the same exact element is not scored
elsif i%2 == j%2:
objf_terms(i, j) = log(p(same))
= -log(1 + exp(-scores(i, j))
else:
objf_terms(i, j) = 1 / (scores.NumRows() - 2) * log(p(different))
= -1/(scores.NumRows() - 2) * log(1+exp(scores(i,j))
@param [out] objf_derivs Element (i,j) of this matrix is the derivative
of objf_terms(i,j) with respect to scores(i, j).
*/
template <typename BaseFloat>
void ComputeXvectorObjfFromScores(const CuMatrixBase<BaseFloat> &scores,
CuMatrixBase<BaseFloat> *objf_terms,
CuMatrixBase<BaseFloat> *objf_derivs);
Expand Down
133 changes: 71 additions & 62 deletions src/ivector/xvector-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
const CuVector<BaseFloat> &w, const CuSpMatrix<BaseFloat> &S,
BaseFloat b, bool is_same, BaseFloat similarity_score,
CuVector<BaseFloat> *deriv_v, CuVector<BaseFloat> *deriv_w,
CuVector<BaseFloat> *deriv_S_and_b);
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b);

void TestComputeXvectorObjfAndDeriv(
const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
CuVector<BaseFloat> *deriv_S_and_b, BaseFloat *tot_objf,
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf,
BaseFloat *tot_weight);

bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
int32 xvector_dim = RandInt(4, 30),
int32 xvector_dim = RandInt(4, 50),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand All @@ -49,14 +49,15 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
S.Scale(1.0e-01);
BaseFloat b = RandInt(-100, 100) / 10.0,
tot_weight,
tot_objf;
tot_objf,
deriv_b;
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> xvector_pairs(num_rows, xvector_dim, kSetZero),
deriv_xvector(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b(S_dim + 1, kSetZero);
CuVector<BaseFloat> deriv_S(S_dim, kSetZero);
xvector_pairs.SetRandn();
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector,
&deriv_S_and_b, &tot_objf, &tot_weight);
&deriv_S, &deriv_b, &tot_objf, &tot_weight);
CuVector<BaseFloat> deriv_xvector_vec(xvector_dim);

// Sum over the derivatives for xvector input.
Expand All @@ -74,14 +75,12 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
xvector_pairs_p(j, i) += perturb_delta;
xvector_pairs_n(j, i) += -perturb_delta;
}
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
BaseFloat tot_objf_p,
tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_xvector += pow(deriv_xvector_vec(i) - delta, 2);
Expand All @@ -92,43 +91,42 @@ bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) {
for (int32 i = 0; i < S_dim; i++) {
CuSpMatrix<BaseFloat> S_p(S);
CuSpMatrix<BaseFloat> S_n(S);
S_p.Data()[i] += perturb_delta;
S_n.Data()[i] -= perturb_delta;
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
CuSubVector<BaseFloat> S_p_vec(S_p.Data(), S_dim);
CuSubVector<BaseFloat> S_n_vec(S_n.Data(), S_dim);
S_p_vec(i) += perturb_delta;
S_n_vec(i) += -perturb_delta;
BaseFloat tot_objf_p,
tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_S += pow(deriv_S_and_b(i) - delta, 2);
l2_S += pow(deriv_S(i) - delta, 2);
}

// Compare the b derivative calculated above with a numerical
// approximation.
BaseFloat b_p = b + perturb_delta;
BaseFloat b_n = b - perturb_delta;
CuMatrix<BaseFloat> deriv_xvector_tmp(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b_tmp(S_dim + 1, kSetZero);
BaseFloat tot_objf_p;
BaseFloat tot_objf_n;
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, &deriv_xvector_tmp,
&deriv_S_and_b_tmp, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta);
l2_b = pow(deriv_S_and_b(S_dim) - delta, 2);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, NULL,
NULL, NULL, &tot_objf_p, &tot_weight);
ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, NULL,
NULL, NULL, &tot_objf_n, &tot_weight);
BaseFloat delta = (tot_objf_p - tot_objf_n)
* 1.0 / (2.0 * perturb_delta);
l2_b = pow(deriv_b - delta, 2);
KALDI_ASSERT(l2_xvector < 1.0e-03);
KALDI_ASSERT(l2_S < 1.0e-03);
KALDI_ASSERT(l2_b < 1.0e-03);
return true;
}

bool TestXvectorComputeObjf() {
int32 xvector_dim = RandInt(4, 30),
int32 xvector_dim = RandInt(4, 40),
num_rows = 2 * RandInt(2, 10); // The number of rows must be even
// and greater than 2.
CuSpMatrix<BaseFloat> S(xvector_dim);
Expand All @@ -139,19 +137,21 @@ bool TestXvectorComputeObjf() {
tot_weight,
tot_weight_test,
tot_objf,
tot_objf_test;
tot_objf_test,
deriv_b,
deriv_b_test;
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> xvector_pairs(num_rows, xvector_dim, kSetZero),
deriv_xvector(num_rows, xvector_dim, kSetZero),
deriv_xvector_test(num_rows, xvector_dim, kSetZero);
CuVector<BaseFloat> deriv_S_and_b(S_dim + 1, kSetZero),
deriv_S_and_b_test(S_dim + 1, kSetZero);
CuVector<BaseFloat> deriv_S(S_dim, kSetZero),
deriv_S_test(S_dim, kSetZero);
xvector_pairs.SetRandn();

ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector,
&deriv_S_and_b, &tot_objf, &tot_weight);
&deriv_S, &deriv_b, &tot_objf, &tot_weight);
TestComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector_test,
&deriv_S_and_b_test, &tot_objf_test, &tot_weight_test);
&deriv_S_test, &deriv_b_test, &tot_objf_test, &tot_weight_test);

CuVector<BaseFloat> deriv_xvector_vec(xvector_dim);
deriv_xvector_vec.AddRowSumMat(1.0, deriv_xvector, 0.0);
Expand All @@ -160,43 +160,50 @@ bool TestXvectorComputeObjf() {

// Verify that the objfs are the same.
KALDI_ASSERT(ApproxEqual(tot_objf, tot_objf_test, 0.001));

// Also verify that the gradients are the same.
for (int32 i = 0; i < deriv_xvector_vec.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i), deriv_xvector_vec_test(i), 0.001));
for (int32 i = 0; i < deriv_S_and_b.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_S_and_b(i), deriv_S_and_b_test(i), 0.001));
KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i),
deriv_xvector_vec_test(i), 0.001));

// Verify that the S derivates are the same.
for (int32 i = 0; i < deriv_S.Dim(); i++)
KALDI_ASSERT(ApproxEqual(deriv_S(i), deriv_S_test(i), 0.001));

// Verify that the b derivates are the same.
KALDI_ASSERT(ApproxEqual(deriv_b, deriv_b_test, 0.001));
return true;
}

void TestComputeXvectorObjfAndDeriv(
const CuMatrixBase<BaseFloat> &xvector_pairs,
const CuSpMatrix<BaseFloat> &S,
BaseFloat b, CuMatrixBase<BaseFloat> *deriv_xvector,
CuVector<BaseFloat> *deriv_S_and_b, BaseFloat *tot_objf,
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b, BaseFloat *tot_objf,
BaseFloat *tot_weight) {

int32 N = xvector_pairs.NumRows();
BaseFloat same_objf = 0,
diff_objf = 0;
BaseFloat K = 1.0 / (N - 2.0);
int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
CuMatrix<BaseFloat> tmp_deriv(N, xvector_pairs.NumCols()
+ S_dim + 1, kSetZero);
(*deriv_b) = 0;
// Handle portion of the objf corresponding to pairs of xvectors
// from the same classes.
for (int32 i = 0; i < N/2; i++) {
const CuVector<BaseFloat> &v(xvector_pairs.Row(2 * i)),
&w(xvector_pairs.Row(2 * i + 1));
CuVector<BaseFloat> deriv_v,
deriv_w,
deriv_S_and_b_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b);
deriv_S_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b),
deriv_b_part = 0;
same_objf += Log(1 + Exp(-similarity_score));
TestGetDeriv(v, w, S, b, true, similarity_score, &deriv_v,
&deriv_w, &deriv_S_and_b_part);
&deriv_w, &deriv_S_part, &deriv_b_part);
deriv_xvector->Row(2 * i).AddVec(1.0, deriv_v);
deriv_xvector->Row(2 * i + 1).AddVec(1.0, deriv_w);
deriv_S_and_b->AddVec(1.0, deriv_S_and_b_part);
deriv_S->AddVec(1.0, deriv_S_part);
(*deriv_b) += deriv_b_part;
}

// Handle portion of the objf corresponding to pairs of xvectors
Expand All @@ -207,14 +214,16 @@ void TestComputeXvectorObjfAndDeriv(
&w(xvector_pairs.Row(j));
CuVector<BaseFloat> deriv_v,
deriv_w,
deriv_S_and_b_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b);
deriv_S_part;
BaseFloat similarity_score = TestSimilarityScore(v, w, S, b),
deriv_b_part = 0;
diff_objf += Log(1 + Exp(similarity_score));
TestGetDeriv(v, w, S, b, false, similarity_score, &deriv_v,
&deriv_w, &deriv_S_and_b_part);
&deriv_w, &deriv_S_part, &deriv_b_part);
deriv_xvector->Row(i).AddVec(K, deriv_v);
deriv_xvector->Row(j).AddVec(K, deriv_w);
deriv_S_and_b->AddVec(K, deriv_S_and_b_part);
deriv_S->AddVec(K, deriv_S_part);
(*deriv_b) += K * deriv_b_part;
}
}
// Scale the same and different portions of the objective function
Expand All @@ -228,12 +237,12 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
const CuVector<BaseFloat> &w, const CuSpMatrix<BaseFloat> &S,
BaseFloat b, bool is_same, BaseFloat similarity_score,
CuVector<BaseFloat> *deriv_v, CuVector<BaseFloat> *deriv_w,
CuVector<BaseFloat> *deriv_S_and_b) {
CuVector<BaseFloat> *deriv_S, BaseFloat *deriv_b) {
int32 d = is_same ? 1 : -1,
S_dim = S.NumCols() * (S.NumCols() + 1) / 2;
deriv_v->Resize(v.Dim(), kSetZero);
deriv_w->Resize(v.Dim(), kSetZero);
deriv_S_and_b->Resize(S_dim + 1, kSetZero);
deriv_S->Resize(S_dim, kSetZero);

// This scalar is common to the different derivatives.
BaseFloat deriv_coef = d * Exp(-1 * d * similarity_score)
Expand All @@ -254,11 +263,10 @@ void TestGetDeriv(const CuVector<BaseFloat> &v,
for (int32 i = 0; i < S.NumCols(); i++)
deriv_S_mat(i, i) = 0.5 * deriv_S_mat(i, i);
CuSubVector<BaseFloat> deriv_S_vec(deriv_S_mat.Data(), S_dim);
CuSubVector<BaseFloat> sub_deriv_S_and_b(*deriv_S_and_b, 0, S_dim);
sub_deriv_S_and_b.AddVec(deriv_coef, deriv_S_vec);
deriv_S->AddVec(deriv_coef, deriv_S_vec);

// Handle derivative with respect to b.
(*deriv_S_and_b)(S_dim) = -deriv_coef;
(*deriv_b) = -deriv_coef;
}

BaseFloat TestSimilarityScore(const CuVector<BaseFloat> &v,
Expand Down Expand Up @@ -286,14 +294,15 @@ void UnitTestXvectorExtractor() {

int main() {
using namespace kaldi;
for (int32 i = 0; i < 2; i++)
for (int32 i = 0; i < 2; i++) {
#if HAVE_CUDA == 1
if (i == 0)
CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("no");
else
CuDevice::Instantiate().SelectGpuId("yes"); // -1 means no GPU
CuDevice::Instantiate().SelectGpuId("yes");
#endif
UnitTestXvectorExtractor();
}
std::cout << "Xvector tests succeeded.\n";
return 0;
}
Loading