diff --git a/src/online/online-feat-input.h b/src/online/online-feat-input.h index b730a373ac0..e433c386212 100644 --- a/src/online/online-feat-input.h +++ b/src/online/online-feat-input.h @@ -31,6 +31,7 @@ #include "online-audio-source.h" #include "feat/feature-functions.h" +#include "feat/feature-window.h" namespace kaldi { @@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf { // "frame_size" - frame extraction window size in audio samples // "frame_shift" - feature frame width in audio samples OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe, - const int32 frame_size, const int32 frame_shift); + const int32 frame_size, const int32 frame_shift, + const bool snip_edges = true); virtual int32 Dim() const { return extractor_->Dim(); } @@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf { const int32 frame_size_; const int32 frame_shift_; Vector wave_; // the samples to be passed for extraction + Vector wave_remainder_; // the samples remained from the previous + // feature batch + FrameExtractionOptions frame_opts_; KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput); }; template OnlineFeInput::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe, - int32 frame_size, int32 frame_shift) + int32 frame_size, int32 frame_shift, + bool snip_edges) : source_(au_src), extractor_(fe), - frame_size_(frame_size), frame_shift_(frame_shift) {} + frame_size_(frame_size), frame_shift_(frame_shift) { + // we need a FrameExtractionOptions to call NumFrames() + // 1000 is just a fake sample rate which equates ms and samples + frame_opts_.samp_freq = 1000; + frame_opts_.frame_shift_ms = frame_shift; + frame_opts_.frame_length_ms = frame_size; + frame_opts_.snip_edges = snip_edges; +} template bool OnlineFeInput::Compute(Matrix *output) { @@ -311,11 +324,26 @@ OnlineFeInput::Compute(Matrix *output) { bool ans = source_->Read(&read_samples); + Vector all_samples(wave_remainder_.Dim() + read_samples.Dim()); + all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_); + all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()). + CopyFromVec(read_samples); + // Extract the features - if (read_samples.Dim() >= frame_size_) { - extractor_->Compute(read_samples, 1.0, output); + if (all_samples.Dim() >= frame_size_) { + // extract waveform remainder before calling Compute() + int32 num_frames = NumFrames(all_samples.Dim(), frame_opts_); + // offset is the amount at the start that has been extracted. + int32 offset = num_frames * frame_shift_; + int32 remaining_len = all_samples.Dim() - offset; + wave_remainder_.Resize(remaining_len); + KALDI_ASSERT(remaining_len >= 0); + if (remaining_len > 0) + wave_remainder_.CopyFromVec(SubVector(all_samples, offset, remaining_len)); + extractor_->Compute(all_samples, 1.0, output); } else { output->Resize(0, 0); + wave_remainder_ = all_samples; } return ans;