-
Notifications
You must be signed in to change notification settings - Fork 14
Chain xvector #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: xvector
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| // featbin/signal-distort.cc | ||
|
|
||
| // Copyright 2016 Pegah Ghahremani | ||
|
|
||
| // See ../../COPYING for clarification regarding multiple authors | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED | ||
| // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, | ||
| // MERCHANTABLITY OR NON-INFRINGEMENT. | ||
| // See the Apache 2 License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
|
|
||
|
|
||
| #include "feat/signal-distort.h" | ||
|
|
||
| namespace kaldi { | ||
|
|
||
| // randomly disturb the input signal using a band-pass filter with no zeros. | ||
| void PerturbXvectorSignal::ComputeAndApplyRandDistortion(const MatrixBase<BaseFloat> &input_egs, | ||
| Matrix<BaseFloat> *perturb_egs) { | ||
| // Generate impluse response |H(w)| using nonzero random sequence and smooth them | ||
| // using moving-average window with small window size. | ||
| // For simplicity, assume zero-phase response and H(w) = |H(w)|. | ||
| // num_fft_samp = 512 | ||
| int32 num_fft_samp = 512; | ||
| Vector<BaseFloat> im_response(num_fft_samp); | ||
|
|
||
| } | ||
|
|
||
| // Stretches the time axis for input egs without fixing the pitch value. | ||
| // It changes the speed and duration of the input signal without fixing pitch. | ||
| // The output y w.r.t input x is going to be y(t - offset) = x(stretch * (t - offset)), | ||
| // where offset is the time index which the signal is stretches along that and the input | ||
| // and output are the same for t = offset. | ||
| // ArbitraryResample class is used to generate resampled output for different time-stretches. | ||
| // The output y is the stretched form of the input, x, and stretch value is randomely generated | ||
| // between [1 - max_stretch, 1 + max_stretch]. | ||
| // y[(m - n + 2 * t)/2] = x[(1 + stretch) * (m - n + 2 * t)/2] for t = 0,..,n | ||
| void PerturbXvectorSignal::TimeStretch(const MatrixBase<BaseFloat> &input_egs, | ||
| Matrix<BaseFloat> *perturb_egs) { | ||
| Matrix<BaseFloat> in_mat(input_egs), | ||
| out_mat(perturb_egs->NumRows(), perturb_egs->NumCols()); | ||
| int32 input_dim = input_egs.NumCols(), | ||
| dim = perturb_egs->NumCols(); | ||
| Vector<BaseFloat> samp_points_secs(dim); | ||
| BaseFloat samp_freq = 2000, | ||
| max_stretch = opts_.max_time_stretch; | ||
| // we stretch the middle part of the example and the input should be expanded | ||
| // by extra frame to be larger than the output length => s * (m+n)/2 < m. | ||
| // y((m - n + 2 * t)/2) = x(s * (m - n + 2 * t)/2) for t = 0,..,n | ||
| // where m = dim(x) and n = dim(y). | ||
| KALDI_ASSERT(input_dim > dim * ((1.0 + max_stretch) / (1.0 - max_stretch))); | ||
| // Generate random stretch value between -max_stretch, max_stretch. | ||
| int32 max_stretch_int = static_cast<int32>(max_stretch * 1000); | ||
| BaseFloat stretch = static_cast<BaseFloat>(RandInt(-max_stretch_int, max_stretch_int) / 1000.0); | ||
| if (abs(stretch) > 0) { | ||
| int32 num_zeros = 4; // Number of zeros of the sinc function that the window extends out to. | ||
| BaseFloat filter_cutoff_hz = samp_freq * 0.475; // lowpass frequency that's lower than 95% of | ||
| // the Nyquist. | ||
| for (int32 i = 0; i < dim; i++) | ||
| samp_points_secs(i) = static_cast<BaseFloat>(((1.0 + stretch) * | ||
| (0.5 * (input_dim - dim) + i))/ samp_freq); | ||
|
|
||
| ArbitraryResample time_resample(input_dim, samp_freq, | ||
| filter_cutoff_hz, | ||
| samp_points_secs, | ||
| num_zeros); | ||
| time_resample.Resample(in_mat, &out_mat); | ||
| } else { | ||
| int32 offset = static_cast<BaseFloat>(0.5 * (input_egs.NumCols() - perturb_egs->NumCols())); | ||
| out_mat.CopyFromMat(input_egs.Range(0, input_egs.NumRows(), offset, perturb_egs->NumCols())); | ||
| } | ||
| perturb_egs->CopyFromMat(out_mat); | ||
| } | ||
|
|
||
|
|
||
| } // end of namespace kaldi |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| // featbin/signal-distort.h | ||
|
|
||
| // Copyright 2016 Pegah Ghahremani | ||
|
|
||
| // See ../../COPYING for clarification regarding multiple authors | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED | ||
| // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, | ||
| // MERCHANTABLITY OR NON-INFRINGEMENT. | ||
| // See the Apache 2 License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| #ifndef KALDI_SIGNAL_DISTORT_H_ | ||
| #define KALDI_SIGNAL_DISTORT_H_ | ||
|
|
||
| #include <cassert> | ||
| #include <cstdlib> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "base/kaldi-error.h" | ||
| #include "matrix/matrix-lib.h" | ||
| #include "util/common-utils.h" | ||
|
|
||
| #include "feat/resample.h" | ||
| #include "matrix/matrix-functions.h" | ||
| #include "cudamatrix/cu-matrix.h" | ||
|
|
||
| namespace kaldi { | ||
|
|
||
| // options class for distorting signals in egs | ||
| struct XvectorPerturbOptions { | ||
| BaseFloat max_shift; | ||
| BaseFloat max_time_stretch; | ||
| int32 frame_dim; | ||
| bool negation; | ||
| bool rand_distort; | ||
| std::string noise_egs; | ||
| XvectorPerturbOptions(): max_shift(0.2), | ||
| max_time_stretch(0.2), | ||
| frame_dim(80), | ||
| negation(false), | ||
| rand_distort(false) { } | ||
| void Register(OptionsItf *opts) { | ||
| opts->Register("max-shift", &max_shift, "Maximum random shift relative" | ||
| "to frame length applied to egs."); | ||
| opts->Register("max-speed-perturb", &max_time_stretch, | ||
| "Max speed perturbation applied on egs."); | ||
| opts->Register("frame-dim", &frame_dim, | ||
| "The numebr of samples in input frame as product of frame_length by samp_freq."); | ||
| opts->Register("negation", &negation, "If true, the input value is negated randomly."); | ||
| opts->Register("noise-egs", &noise_egs, "If supplied, the additive noise is added."); | ||
| opts->Register("rand_distort", &rand_distort, "If true, the signal is slightly changes" | ||
| "using some designed FIR filter with no zeros."); | ||
| } | ||
| }; | ||
|
|
||
| class PerturbXvectorSignal { | ||
| public: | ||
| PerturbXvectorSignal(XvectorPerturbOptions opts): opts_(opts) { }; | ||
|
|
||
| void ComputeAndApplyRandDistortion(const MatrixBase<BaseFloat> &input_egs, | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be cleaner to have this operate on vectors rather than matrices, and have separate code you call before and after that converts between the matrix and vector representations (and checks that the Indexes are in the correct order, etc.)
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added two separate functions for vectorization and unvectorization. However we need to apply filters separately on each rows, so why do we really need vectorization???
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the egs are dumped, it may not be the case that each time-sequence has Dan On Thu, Feb 18, 2016 at 12:51 PM, pegahgh notifications@github.com wrote:
|
||
| Matrix<BaseFloat> *perturb_egs); | ||
|
|
||
| void TimeStretch(const MatrixBase<BaseFloat> &input_egs, | ||
| Matrix<BaseFloat> *perturb_egs); | ||
|
|
||
| private: | ||
| XvectorPerturbOptions opts_; | ||
| }; | ||
|
|
||
| } // end of namespace kaldi | ||
| #endif // KALDI_SIGNAL_DISTORT_H_ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| // nnet3bin/nnet3-copy-egs.cc | ||
|
|
||
| // Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) | ||
| // 2016 Pegah Ghahremani | ||
|
|
||
| // See ../../COPYING for clarification regarding multiple authors | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED | ||
| // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, | ||
| // MERCHANTABLITY OR NON-INFRINGEMENT. | ||
| // See the Apache 2 License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| #include "base/kaldi-common.h" | ||
| #include "util/common-utils.h" | ||
| #include "hmm/transition-model.h" | ||
| #include "nnet3/nnet-example.h" | ||
| #include "nnet3/nnet-example-utils.h" | ||
|
|
||
| int main(int argc, char *argv[]) { | ||
| try { | ||
| using namespace kaldi; | ||
| using namespace kaldi::nnet3; | ||
| typedef kaldi::int32 int32; | ||
| typedef kaldi::int64 int64; | ||
|
|
||
| const char *usage = | ||
| "Combine examples for neural network training and supports multiple rspecifiers, in which case it will reads the inputs \n" | ||
| "round-robin and writes to the output" | ||
| "\n" | ||
| "Usage: nnet3-fold-egs [options] <egs-rspecifier1> [<egs-rspecifier2> ...] <egs-wspecifier>\n" | ||
| "\n" | ||
| "e.g.\n" | ||
| "nnet3-fold-egs ark:1.egs ark:2.egs ark,t:text.egs\n" | ||
| "or:\n" | ||
| "nnet3-fold-egs ark:train.egs ark:1.egs ark:2.egs\n"; | ||
|
|
||
| ParseOptions po(usage); | ||
| po.Read(argc, argv); | ||
|
|
||
| if (po.NumArgs() < 2) { | ||
| po.PrintUsage(); | ||
| exit(1); | ||
| } | ||
|
|
||
| // | ||
| int32 num_inputs = po.NumArgs() - 1; | ||
| std::vector<SequentialNnetExampleReader*> example_readers(num_inputs); | ||
| for (int32 i = 0; i < num_inputs; i++) | ||
| example_readers[i] = new SequentialNnetExampleReader(po.GetArg(i+1)); | ||
|
|
||
| std::string examples_wspecifier(po.GetArg(num_inputs+1)); | ||
| NnetExampleWriter example_writer(examples_wspecifier); | ||
| int64 num_written = 0; | ||
| std::vector<int64> num_read(num_inputs); | ||
|
|
||
| //for (; !example_readers[0]->Done(); tot_num_read++) { | ||
| while (!example_readers[0]->Done()) { | ||
| for (int32 reader = 0; reader < num_inputs; reader++) { | ||
| if (!example_readers[reader]->Done()) { | ||
| example_readers[reader]->Next(); | ||
| num_read[reader]++; | ||
| std::string key = example_readers[reader]->Key(); | ||
| const NnetExample &eg = example_readers[reader]->Value(); | ||
| example_writer.Write(key, eg); | ||
| num_written++; | ||
| } | ||
| } | ||
| } | ||
| for (int32 i = 0; i < num_inputs; i++) | ||
| delete example_readers[i]; | ||
|
|
||
| KALDI_LOG << "Read " << num_read[0] << "neural-network training examples " | ||
| << "from " << num_inputs << " inputs, wrote " | ||
| << num_written; | ||
|
|
||
| return (num_written == 0 ? 1 : 0); | ||
| } catch(const std::exception &e) { | ||
| std::cerr << e.what() << '\n'; | ||
| return -1; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use - not _.