diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index a52c42cf347..91d9bd51c4c 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -5,6 +5,7 @@ // 2013 Hainan Xu // 2013 Xiaohui Zhang // 2013-2015 Guoguo Chen +// 2016 David Snyder // See ../../COPYING for clarification regarding multiple authors // @@ -179,6 +180,11 @@ void cudaF_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, const float *mat2, float *mask, MatrixDim mat1_dim, int mat2_stride, int mask_stride); +void cudaF_compute_xvector_objf(dim3 Gr, dim3 Bl, const float *scores, + MatrixDim scores_dim, float *obfj_terms, + MatrixDim objf_dim, float *objf_derivs, + MatrixDim derivs_dim); + /********************************************************* * double CUDA kernel calls */ @@ -302,6 +308,10 @@ void cudaD_copy_from_sp(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim void cudaD_take_lower(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in); void cudaD_take_upper(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in); void cudaD_take_mean(dim3 Gr, dim3 Bl, const double* x, double* y, MatrixDim d_in); +void cudaD_compute_xvector_objf(dim3 Gr, dim3 Bl, const double *scores, + MatrixDim scores_dim, double *obfj_terms, + MatrixDim objf_dim, double *objf_derivs, + MatrixDim derivs_dim); // some mostly mixed-type kernels. @@ -349,8 +359,6 @@ void cudaD_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, const double *mat2, double *mask, MatrixDim mat1_dim, int mat2_stride, int mask_stride); - - } // extern "C" #endif // HAVE_CUDA diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index d494be4169a..00d4f8bebf8 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -6,6 +6,7 @@ // 2013 Hainan Xu // 2013 Xiaohui Zhang // 2013-2015 Guoguo Chen +// 2016 David Snyder // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -2094,6 +2095,26 @@ static void _diff_xent(const int32_cuda* vec_tgt, Real* mat_net_out, Real* vec_l } } +template +__global__ +static void _compute_xvector_objf(const Real* scores, MatrixDim scores_dim, + Real* obfj_terms, MatrixDim objf_dim, + Real* obfj_derivs, MatrixDim derivs_dim) { + int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda scores_index = i + j * scores_dim.stride; + Real K = 1.0 / (scores_dim.rows - 2.0); + Real L = scores[scores_index]; + if (i < scores_dim.cols && j < scores_dim.rows && i < j) { + if (i + 1 == j && i % 2 == 0) { + obfj_terms[scores_index] = log(1.0 + exp(-L)); + obfj_derivs[scores_index] = 1.0 / (1.0 + exp(L)); + } else if (i != j) { + obfj_terms[scores_index] = K * log(1.0 + exp(L)); + obfj_derivs[scores_index] = -K / (1.0 + exp(-L)); + } + } +} /*********************************************************************** @@ -2575,6 +2596,14 @@ void cudaF_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, _equal_element_mask<<>>(mat1, mat2, mask, mat1_dim, mat2_stride, mask_stride); } +void cudaF_compute_xvector_objf(dim3 Gr, dim3 Bl, const float *scores, + MatrixDim scores_dim, float *objf_terms, + MatrixDim objf_dim, float *objf_derivs, + MatrixDim derivs_dim) { + _compute_xvector_objf<<>>(scores, scores_dim, objf_terms, objf_dim, + objf_derivs, derivs_dim); +} + /* * "double" */ @@ -3029,6 +3058,13 @@ void cudaD_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, _equal_element_mask<<>>(mat1, mat2, mask, mat1_dim, mat2_stride, mask_stride); } +void cudaD_compute_xvector_objf(dim3 Gr, dim3 Bl, const double *scores, + MatrixDim scores_dim, double *objf_terms, + MatrixDim objf_dim, double *objf_derivs, + MatrixDim derivs_dim) { + _compute_xvector_objf<<>>(scores, scores_dim, objf_terms, objf_dim, + objf_derivs, derivs_dim); +} /* Some conversion kernels for which it's more convenient to not name them F or D. */ diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 0ded2f794d3..a6e6d2f4961 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -289,6 +289,13 @@ inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const float *mat1, const f cudaF_equal_element_mask(Gr, Bl, mat1, mat2, mask, mat1_dim, mat2_stride, mask_stride); } +inline void cuda_compute_xvector_objf(dim3 Gr, dim3 Bl, const float *scores, + MatrixDim scores_dim, float *obfj_terms, + MatrixDim objf_dim, float *objf_derivs, + MatrixDim derivs_dim) { + cudaF_compute_xvector_objf(Gr, Bl, scores, scores_dim, obfj_terms, objf_dim, + objf_derivs, derivs_dim); +} // double versions @@ -467,6 +474,14 @@ inline void cuda_equal_element_mask(dim3 Gr, dim3 Bl, const double *mat1, const cudaD_equal_element_mask(Gr, Bl, mat1, mat2, mask, mat1_dim, mat2_stride, mask_stride); } +inline void cuda_compute_xvector_objf(dim3 Gr, dim3 Bl, const double *scores, + MatrixDim scores_dim, double *obfj_terms, + MatrixDim objf_dim, double *objf_derivs, + MatrixDim derivs_dim) { + cudaD_compute_xvector_objf(Gr, Bl, scores, scores_dim, obfj_terms, objf_dim, + objf_derivs, derivs_dim); +} + // Also include some template-friendly wrappers of cublas functions: inline cublasStatus_t cuda_axpy(cublasHandle_t handle, int n, float alpha, const float *x, int incx, float *y, int incy) { return cublasSaxpy_v2(handle, n, &alpha, x, incx, y, incy); diff --git a/src/cudamatrix/cu-math.cc b/src/cudamatrix/cu-math.cc index 97757ba68dd..d5e9cfb6ef8 100644 --- a/src/cudamatrix/cu-math.cc +++ b/src/cudamatrix/cu-math.cc @@ -2,6 +2,7 @@ // Copyright 2009-2012 Karel Vesely // Johns Hopkins University (author: Daniel Povey) +// 2016 David Snyder // See ../../COPYING for clarification regarding multiple authors // @@ -29,15 +30,15 @@ namespace kaldi { namespace cu { /* - * templated functions wrapping the ANSI-C CUDA kernel functions + * templated functions wrapping the ANSI-C CUDA kernel functions */ template void RegularizeL1(CuMatrixBase *weight, CuMatrixBase *grad, Real l1, Real lr) { KALDI_ASSERT(SameDim(*weight, *grad)); -#if HAVE_CUDA == 1 - if (CuDevice::Instantiate().Enabled()) { +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { Timer tim; dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); @@ -46,7 +47,7 @@ void RegularizeL1(CuMatrixBase *weight, CuMatrixBase *grad, Real l1, cuda_regularize_l1(dimGrid, dimBlock, weight->Data(), grad->Data(), l1, lr, weight->Dim(), grad->Stride()); CU_SAFE_CALL(cudaGetLastError()); - + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -55,11 +56,11 @@ void RegularizeL1(CuMatrixBase *weight, CuMatrixBase *grad, Real l1, MatrixBase &grad2 = grad->Mat(); for(MatrixIndexT r=0; r &src, #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - + /* - Note: default 16x16 block-size limits the --cachesize to matrix size 16*65535 x 16*65535 + Note: default 16x16 block-size limits the --cachesize to matrix size 16*65535 x 16*65535 dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); dim3 dimGrid(n_blocks(tgt->NumCols(), CU2DBLOCK), n_blocks(copy_from_idx.Dim(), CU2DBLOCK)); */ /* * Let's use blocksize 4 x 128 (512 threads/block) - * and extend the randomizable matrices to: col 4*65535, row 128*65535 + * and extend the randomizable matrices to: col 4*65535, row 128*65535 * (ie. max-cols:262140 (dim), max-rows:8388480 (datapoints)) */ dim3 dimBlock(4, 128); @@ -111,7 +112,7 @@ void Randomize(const CuMatrixBase &src, cuda_randomize(dimGrid, dimBlock, tgt->Data(), src.Data(), copy_from_idx.Data(), dimtgt, dimsrc); CU_SAFE_CALL(cudaGetLastError()); - + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -124,28 +125,28 @@ void Randomize(const CuMatrixBase &src, tgtmat.Row(i).CopyFromVec(srcmat.Row(copy_from_idxvec[i])); } } -} +} template void Splice(const CuMatrixBase &src, const CuArray &frame_offsets, CuMatrixBase *tgt) { - + KALDI_ASSERT(src.NumCols()*frame_offsets.Dim() == tgt->NumCols()); KALDI_ASSERT(src.NumRows() == tgt->NumRows()); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - + dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); dim3 dimGrid(n_blocks(tgt->NumCols(), CU2DBLOCK), n_blocks(tgt->NumRows(), CU2DBLOCK)); - + cuda_splice(dimGrid, dimBlock, tgt->Data(), src.Data(), frame_offsets.Data(), tgt->Dim(), src.Dim()); CU_SAFE_CALL(cudaGetLastError()); - + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -171,7 +172,7 @@ void Splice(const CuMatrixBase &src, const CuArray &frame_offsets, template void Copy(const CuMatrixBase &src, const CuArray ©_from_indices, - CuMatrixBase *tgt) { + CuMatrixBase *tgt) { KALDI_ASSERT(copy_from_indices.Dim() == tgt->NumCols()); KALDI_ASSERT(src.NumRows() == tgt->NumRows()); @@ -179,14 +180,14 @@ void Copy(const CuMatrixBase &src, const CuArray ©_from_indices #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - + dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); dim3 dimGrid(n_blocks(tgt->NumCols(), CU2DBLOCK), n_blocks(tgt->NumRows(), CU2DBLOCK)); - + cuda_copy(dimGrid, dimBlock, tgt->Data(), src.Data(), copy_from_indices.Data(), tgt->Dim(), src.Dim()); CU_SAFE_CALL(cudaGetLastError()); - + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -205,6 +206,31 @@ void Copy(const CuMatrixBase &src, const CuArray ©_from_indices } } +template +void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, + CuMatrixBase *objf_terms, + CuMatrixBase *objf_derivs) { + #if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + dim3 dimBlock(CU2DBLOCK, CU2DBLOCK); + dim3 dimGrid(n_blocks(scores.NumCols(), CU2DBLOCK), + n_blocks(scores.NumRows(), CU2DBLOCK)); + + cuda_compute_xvector_objf(dimGrid, dimBlock, scores.Data(), scores.Dim(), + objf_terms->Data(), objf_terms->Dim(), objf_derivs->Data(), + objf_derivs->Dim()); + CU_SAFE_CALL(cudaGetLastError()); + + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else + #endif + { + // TODO: Add the CPU version. + KALDI_LOG << "NOT USING CUDA"; + } +} + // instantiate the templates. template void RegularizeL1(CuMatrixBase *weight, CuMatrixBase *grad, float l1, float lr); @@ -233,6 +259,15 @@ void Randomize(const CuMatrixBase &src, const CuArray ©_from_idx, CuMatrixBase *tgt); +template +void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, + CuMatrixBase *objf_terms, + CuMatrixBase *objf_derivs); +template +void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, + CuMatrixBase *objf_terms, + CuMatrixBase *objf_derivs); + } //namespace cu diff --git a/src/cudamatrix/cu-math.h b/src/cudamatrix/cu-math.h index 65a4c0c4af3..a30cec5d9df 100644 --- a/src/cudamatrix/cu-math.h +++ b/src/cudamatrix/cu-math.h @@ -1,7 +1,8 @@ // cudamatrix/cu-math.h // Copyright 2009-2012 Karel Vesely -// 2013 Johns Hopkins University (Author: David Snyder) +// 2013 Johns Hopkins University (Author: Daniel Povey) +// 2016 David Snyder // See ../../COPYING for clarification regarding multiple authors // @@ -78,7 +79,13 @@ void Group2norm(const CuMatrixBase &src, CuMatrixBase *dest, int32 group_stride); - +/* +TODO: Documentation. +*/ +template +void ComputeXvectorObjfFromScores(const CuMatrixBase &scores, + CuMatrixBase *objf_terms, + CuMatrixBase *objf_derivs); } // namespace cu diff --git a/src/ivector/Makefile b/src/ivector/Makefile index 918bd4ef113..7bd8c1ce351 100644 --- a/src/ivector/Makefile +++ b/src/ivector/Makefile @@ -5,14 +5,18 @@ OPENFST_CXXFLAGS = OPENFST_LDLIBS = include ../kaldi.mk -TESTFILES = ivector-extractor-test plda-test logistic-regression-test +LDFLAGS += $(CUDA_LDFLAGS) +LDLIBS += $(CUDA_LDLIBS) -OBJFILES = ivector-extractor.o voice-activity-detection.o plda.o logistic-regression.o +TESTFILES = ivector-extractor-test plda-test logistic-regression-test xvector-test + +OBJFILES = ivector-extractor.o voice-activity-detection.o plda.o logistic-regression.o xvector.o LIBNAME = kaldi-ivector ADDLIBS = ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../transform/kaldi-transform.a \ - ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a \ - ../util/kaldi-util.a + ../thread/kaldi-thread.a ../cudamatrix/kaldi-cudamatrix.a \ + ../matrix/kaldi-matrix.a ../base/kaldi-base.a \ + ../util/kaldi-util.a include ../makefiles/default_rules.mk diff --git a/src/ivector/xvector-test.cc b/src/ivector/xvector-test.cc new file mode 100644 index 00000000000..229863e820a --- /dev/null +++ b/src/ivector/xvector-test.cc @@ -0,0 +1,299 @@ +// ivector/xvector-test.cc + +// Copyright 2016 David Snyder + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "ivector/xvector.h" +#include "util/kaldi-io.h" +#include "cudamatrix/cu-matrix-lib.h" + +namespace kaldi { +BaseFloat TestSimilarityScore(const CuVector &v, + const CuVector &w, const CuSpMatrix &S, + BaseFloat b); + +void TestGetDeriv(const CuVector &v, + const CuVector &w, const CuSpMatrix &S, + BaseFloat b, bool is_same, BaseFloat similarity_score, + CuVector *deriv_v, CuVector *deriv_w, + CuVector *deriv_S_and_b); + +void TestComputeXvectorObjfAndDeriv( + const CuMatrixBase &xvector_pairs, + const CuSpMatrix &S, + BaseFloat b, CuMatrixBase *deriv_xvector, + CuVector *deriv_S_and_b, BaseFloat *tot_objf, + BaseFloat *tot_weight); + +bool TestXvectorExtractorDerivative(BaseFloat perturb_delta) { + int32 xvector_dim = RandInt(4, 30), + num_rows = 2 * RandInt(2, 10); // The number of rows must be even + // and greater than 2. + CuSpMatrix S(xvector_dim); + S.SetRandn(); + // Necessary to keep the similarity scores from getting too large or small. + S.Scale(1.0e-01); + BaseFloat b = RandInt(-100, 100) / 10.0, + tot_weight, + tot_objf; + int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; + CuMatrix xvector_pairs(num_rows, xvector_dim, kSetZero), + deriv_xvector(num_rows, xvector_dim, kSetZero); + CuVector deriv_S_and_b(S_dim + 1, kSetZero); + xvector_pairs.SetRandn(); + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector, + &deriv_S_and_b, &tot_objf, &tot_weight); + CuVector deriv_xvector_vec(xvector_dim); + + // Sum over the derivatives for xvector input. + deriv_xvector_vec.AddRowSumMat(1.0, deriv_xvector, 0.0); + BaseFloat l2_xvector = 0, + l2_S = 0, + l2_b = 0; + + // Compare the xvector derivatives calculated above with a numerical + // approximation. + for (int32 i = 0; i < xvector_dim; i++) { + CuMatrix xvector_pairs_p(xvector_pairs); + CuMatrix xvector_pairs_n(xvector_pairs); + for (int32 j = 0; j < num_rows; j++) { + xvector_pairs_p(j, i) += perturb_delta; + xvector_pairs_n(j, i) += -perturb_delta; + } + CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); + CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); + BaseFloat tot_objf_p; + BaseFloat tot_objf_n; + ComputeXvectorObjfAndDeriv(xvector_pairs_p, S, b, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs_n, S, b, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); + BaseFloat delta = (tot_objf_p - tot_objf_n) + * 1.0 / (2.0 * perturb_delta); + l2_xvector += pow(deriv_xvector_vec(i) - delta, 2); + } + + // Compare the S derivative calculated above with a numerical + // approximation. + for (int32 i = 0; i < S_dim; i++) { + CuSpMatrix S_p(S); + CuSpMatrix S_n(S); + S_p.Data()[i] += perturb_delta; + S_n.Data()[i] -= perturb_delta; + CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); + CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); + BaseFloat tot_objf_p; + BaseFloat tot_objf_n; + ComputeXvectorObjfAndDeriv(xvector_pairs, S_p, b, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs, S_n, b, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); + BaseFloat delta = (tot_objf_p - tot_objf_n) + * 1.0 / (2.0 * perturb_delta); + l2_S += pow(deriv_S_and_b(i) - delta, 2); + } + + // Compare the b derivative calculated above with a numerical + // approximation. + BaseFloat b_p = b + perturb_delta; + BaseFloat b_n = b - perturb_delta; + CuMatrix deriv_xvector_tmp(num_rows, xvector_dim, kSetZero); + CuVector deriv_S_and_b_tmp(S_dim + 1, kSetZero); + BaseFloat tot_objf_p; + BaseFloat tot_objf_n; + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_p, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_p, &tot_weight); + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b_n, &deriv_xvector_tmp, + &deriv_S_and_b_tmp, &tot_objf_n, &tot_weight); + BaseFloat delta = (tot_objf_p - tot_objf_n) * 1.0 / (2.0 * perturb_delta); + l2_b = pow(deriv_S_and_b(S_dim) - delta, 2); + KALDI_ASSERT(l2_xvector < 1.0e-03); + KALDI_ASSERT(l2_S < 1.0e-03); + KALDI_ASSERT(l2_b < 1.0e-03); + return true; +} + +bool TestXvectorComputeObjf() { + int32 xvector_dim = RandInt(4, 30), + num_rows = 2 * RandInt(2, 10); // The number of rows must be even + // and greater than 2. + CuSpMatrix S(xvector_dim); + S.SetRandn(); + // Necessary to keep the similarity scores from getting too large or small. + S.Scale(1.0e-01); + BaseFloat b = RandInt(-200, 200) / 10.0, + tot_weight, + tot_weight_test, + tot_objf, + tot_objf_test; + int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; + CuMatrix xvector_pairs(num_rows, xvector_dim, kSetZero), + deriv_xvector(num_rows, xvector_dim, kSetZero), + deriv_xvector_test(num_rows, xvector_dim, kSetZero); + CuVector deriv_S_and_b(S_dim + 1, kSetZero), + deriv_S_and_b_test(S_dim + 1, kSetZero); + xvector_pairs.SetRandn(); + + ComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector, + &deriv_S_and_b, &tot_objf, &tot_weight); + TestComputeXvectorObjfAndDeriv(xvector_pairs, S, b, &deriv_xvector_test, + &deriv_S_and_b_test, &tot_objf_test, &tot_weight_test); + + CuVector deriv_xvector_vec(xvector_dim); + deriv_xvector_vec.AddRowSumMat(1.0, deriv_xvector, 0.0); + CuVector deriv_xvector_vec_test(xvector_dim); + deriv_xvector_vec_test.AddRowSumMat(1.0, deriv_xvector_test, 0.0); + + // Verify that the objfs are the same. + KALDI_ASSERT(ApproxEqual(tot_objf, tot_objf_test, 0.001)); + // Also verify that the gradients are the same. + for (int32 i = 0; i < deriv_xvector_vec.Dim(); i++) + KALDI_ASSERT(ApproxEqual(deriv_xvector_vec(i), deriv_xvector_vec_test(i), 0.001)); + for (int32 i = 0; i < deriv_S_and_b.Dim(); i++) + KALDI_ASSERT(ApproxEqual(deriv_S_and_b(i), deriv_S_and_b_test(i), 0.001)); + return true; +} + +void TestComputeXvectorObjfAndDeriv( + const CuMatrixBase &xvector_pairs, + const CuSpMatrix &S, + BaseFloat b, CuMatrixBase *deriv_xvector, + CuVector *deriv_S_and_b, BaseFloat *tot_objf, + BaseFloat *tot_weight) { + + int32 N = xvector_pairs.NumRows(); + BaseFloat same_objf = 0, + diff_objf = 0; + BaseFloat K = 1.0 / (N - 2.0); + int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2; + CuMatrix tmp_deriv(N, xvector_pairs.NumCols() + + S_dim + 1, kSetZero); + // Handle portion of the objf corresponding to pairs of xvectors + // from the same classes. + for (int32 i = 0; i < N/2; i++) { + const CuVector &v(xvector_pairs.Row(2 * i)), + &w(xvector_pairs.Row(2 * i + 1)); + CuVector deriv_v, + deriv_w, + deriv_S_and_b_part; + BaseFloat similarity_score = TestSimilarityScore(v, w, S, b); + same_objf += Log(1 + Exp(-similarity_score)); + TestGetDeriv(v, w, S, b, true, similarity_score, &deriv_v, + &deriv_w, &deriv_S_and_b_part); + deriv_xvector->Row(2 * i).AddVec(1.0, deriv_v); + deriv_xvector->Row(2 * i + 1).AddVec(1.0, deriv_w); + deriv_S_and_b->AddVec(1.0, deriv_S_and_b_part); + } + + // Handle portion of the objf corresponding to pairs of xvectors + // from different classes. + for (int32 i = 0; i < N; i++) { + for (int32 j = 2 * std::ceil((i + 1) / 2.0); j < N; j++) { + const CuVector &v(xvector_pairs.Row(i)), + &w(xvector_pairs.Row(j)); + CuVector deriv_v, + deriv_w, + deriv_S_and_b_part; + BaseFloat similarity_score = TestSimilarityScore(v, w, S, b); + diff_objf += Log(1 + Exp(similarity_score)); + TestGetDeriv(v, w, S, b, false, similarity_score, &deriv_v, + &deriv_w, &deriv_S_and_b_part); + deriv_xvector->Row(i).AddVec(K, deriv_v); + deriv_xvector->Row(j).AddVec(K, deriv_w); + deriv_S_and_b->AddVec(K, deriv_S_and_b_part); + } + } + // Scale the same and different portions of the objective function + // so that both contribute a weight of N. + (*tot_objf) = same_objf + K * diff_objf; + (*tot_weight) = N; +} + + +void TestGetDeriv(const CuVector &v, + const CuVector &w, const CuSpMatrix &S, + BaseFloat b, bool is_same, BaseFloat similarity_score, + CuVector *deriv_v, CuVector *deriv_w, + CuVector *deriv_S_and_b) { + int32 d = is_same ? 1 : -1, + S_dim = S.NumCols() * (S.NumCols() + 1) / 2; + deriv_v->Resize(v.Dim(), kSetZero); + deriv_w->Resize(v.Dim(), kSetZero); + deriv_S_and_b->Resize(S_dim + 1, kSetZero); + + // This scalar is common to the different derivatives. + BaseFloat deriv_coef = d * Exp(-1 * d * similarity_score) + / (1 + Exp(-1 * d * similarity_score)); + + // Handle derivative with respect to v and w. + deriv_v->CopyFromVec(w); + deriv_w->CopyFromVec(v); + deriv_v->AddSpVec(2.0, S, v, -1.0); + deriv_w->AddSpVec(2.0, S, w, -1.0); + deriv_v->Scale(deriv_coef); + deriv_w->Scale(deriv_coef); + + // Handle derivative with respect to S. + CuSpMatrix deriv_S_mat(S.NumCols(), kSetZero); + deriv_S_mat.AddVec2(2.0, v); + deriv_S_mat.AddVec2(2.0, w); + for (int32 i = 0; i < S.NumCols(); i++) + deriv_S_mat(i, i) = 0.5 * deriv_S_mat(i, i); + CuSubVector deriv_S_vec(deriv_S_mat.Data(), S_dim); + CuSubVector sub_deriv_S_and_b(*deriv_S_and_b, 0, S_dim); + sub_deriv_S_and_b.AddVec(deriv_coef, deriv_S_vec); + + // Handle derivative with respect to b. + (*deriv_S_and_b)(S_dim) = -deriv_coef; +} + +BaseFloat TestSimilarityScore(const CuVector &v, + const CuVector &w, const CuSpMatrix &S, + BaseFloat b) { + CuVector Sv(v.Dim()); + Sv.AddSpVec(1.0, S, v, 0); + CuVector Sw(w.Dim()); + Sw.AddSpVec(1.0, S, w, 0); + BaseFloat L = VecVec(v, w) - VecVec(v, Sv) - VecVec(w, Sw) + b; + return L; +} + +void UnitTestXvectorExtractor() { + if (!TestXvectorComputeObjf()) + KALDI_ERR << "Xvector objf test failed"; + if (!TestXvectorExtractorDerivative(1.0e-02) && + !TestXvectorExtractorDerivative(1.0e-03) && + !TestXvectorExtractorDerivative(1.0e-04) && + !TestXvectorExtractorDerivative(1.0e-05)) + KALDI_ERR << "Xvector derivative test failed"; +} + +} // namespace kaldi + +int main() { + using namespace kaldi; + for (int32 i = 0; i < 2; i++) +#if HAVE_CUDA == 1 + if (i == 0) + CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU + else + CuDevice::Instantiate().SelectGpuId("yes"); // -1 means no GPU +#endif + UnitTestXvectorExtractor(); + std::cout << "Xvector tests succeeded.\n"; + return 0; +} diff --git a/src/ivector/xvector.cc b/src/ivector/xvector.cc new file mode 100644 index 00000000000..a6e8533b611 --- /dev/null +++ b/src/ivector/xvector.cc @@ -0,0 +1,78 @@ +// ivector/xvector.cc + +// Copyright 2016 David Snyder + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "ivector/xvector.h" + +namespace kaldi { + +void ComputeXvectorObjfAndDeriv( + const CuMatrixBase &xvector_pairs, + const CuSpMatrix &S, + BaseFloat b, CuMatrixBase *deriv_xvector, + CuVector *deriv_S_and_b, BaseFloat *tot_objf, + BaseFloat *tot_weight) { + + int32 S_dim = S.NumCols() * (S.NumCols() + 1) / 2, + N = xvector_pairs.NumRows(), + xvector_dim = xvector_pairs.NumCols(); + BaseFloat K = 1.0 / (N - 2.0); + (*tot_objf) = 0; + + if (deriv_xvector == NULL) + KALDI_ASSERT(deriv_S_and_b == NULL); + else { + KALDI_ASSERT(deriv_xvector->NumCols() == xvector_dim); + KALDI_ASSERT(deriv_xvector->NumRows() == N); + KALDI_ASSERT(deriv_S_and_b->Dim() == S_dim + 1); + } + + CuMatrix S_tmp(S); + CuMatrix P(N, xvector_dim), + Q(N, N), + R(N, N), + T(N, N), + objf_terms(N, N), + objf_deriv_terms(N, N); + + CuVector r(N); + P.AddMatMat(1.0, xvector_pairs, kNoTrans, S_tmp, kNoTrans, 0.0); + r.AddDiagMatMat(1.0, xvector_pairs, kNoTrans, P, kTrans, 0.0); + R.AddVecToRows(1.0, r); + Q.SymAddMat2(1.0, xvector_pairs, kNoTrans, 0.0); + Q.CopyLowerToUpper(); + T.AddMat(1.0, Q, kNoTrans); + T.AddMat(-1.0, R, kTrans); + T.AddMat(-1.0, R, kNoTrans); + T.Add(b); + + cu::ComputeXvectorObjfFromScores(T, &objf_terms, &objf_deriv_terms); + CuVector objf_terms_vec(N); + objf_terms_vec.AddRowSumMat(1.0, objf_terms); + (*tot_objf) = objf_terms_vec.Sum(); + + if (deriv_xvector != NULL) { + /* TODO: Call cu-math function that handles the derivatives of S + and the xvectors. + */ + (*deriv_S_and_b)(S_dim) = -objf_deriv_terms.Sum(); + } + (*tot_weight) = N; +} + +} // namespace kaldi diff --git a/src/ivector/xvector.h b/src/ivector/xvector.h new file mode 100644 index 00000000000..ddb05c632d7 --- /dev/null +++ b/src/ivector/xvector.h @@ -0,0 +1,74 @@ +// ivector/xvector.h + +// Copyright 2016 Johns Hopkins University (Author: Daniel Povey) +// 2016 David Snyder + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_IVECTOR_XVECTOR_H_ +#define KALDI_IVECTOR_XVECTOR_H_ + +#include +#include "base/kaldi-common.h" +#include "cudamatrix/cu-matrix-lib.h" +#include "itf/options-itf.h" +#include "util/common-utils.h" +#include "matrix/matrix-lib.h" + +namespace kaldi { + /* + Computes the training objective function and the derivatives for + the xvector. Let N = xvector_pairs.NumRows() be the number of + xvectors. There are N(N-1)/2 pairs in total and N from the same + class. Let v(n) be the n'th row of the matrix xvector_pairs. + The total objective function written to 'tot_objf' is + \sum_{n=0}^{N/2} p_same(v(n*2), v(n*2+1)) + + 1/(N-2) \sum_{n=0}^{N} \sum_{m=2*ceil(n+1)/2)}^{N} + p_different(v(m), v(n)) + and let N be the normalizer for the objective function, written to + 'tot_weight' and equal to the total (weighted) number of samples over + which the objective function is computed. It is useful for displaying + the objective function correctly. + Let the log-odds L(v,w) [interpreted as log(p_same(v,w) / p_different(v,w))] + be defined as: + L(v, w) = v' w - v' S v - w' S w + then p_same(v, w) = -log(1 + exp(-l(v, w)), and + p_different(v, w) = 1 - p_same(v, w) = -log(1 + exp(-l(v, w)). + + @param [in] xvector_pairs Each row of 'xvector_pairs' is an xvector + extracted by the network for one sample, and the assumption is that + pairs of the form (2*k, 2*k+1), e.g., (0, 1), (2, 3), (4, 5), etc, + are from the same class, but any other pairs, e.g., (0, 2), (1, 2), + (2, 4), etc, are from different classes. + @param [out] deriv_xvector If non-NULL, the derivative of the objective + function with respect to the xvectors is written here. + @param [out] deriv_S_and_b If non-NULL, the derivative of the objective + function with respect to the parameters S and b are written here. + @param [out] tot_objf The total objective function described above + @param [out] tot_weight The total normalizing factor for the objective + function, equal to dvector_pairs.NumRows(). + */ + void ComputeXvectorObjfAndDeriv(const CuMatrixBase &dvector_pairs, + const CuSpMatrix &S, + BaseFloat b, + CuMatrixBase *deriv_xvector, + CuVector *deriv_S_and_b, + BaseFloat *tot_objf, + BaseFloat *tot_weight); + +} // namespace kaldi + +#endif