Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions src/online/online-feat-input.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "online-audio-source.h"
#include "feat/feature-functions.h"
#include "feat/feature-window.h"

namespace kaldi {

Expand Down Expand Up @@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf {
// "frame_size" - frame extraction window size in audio samples
// "frame_shift" - feature frame width in audio samples
OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
const int32 frame_size, const int32 frame_shift);
const int32 frame_size, const int32 frame_shift,
const bool snip_edges = true);

virtual int32 Dim() const { return extractor_->Dim(); }

Expand All @@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf {
const int32 frame_size_;
const int32 frame_shift_;
Vector<BaseFloat> wave_; // the samples to be passed for extraction
Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
// feature batch
FrameExtractionOptions frame_opts_;

KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput);
};

template<class E>
OnlineFeInput<E>::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
int32 frame_size, int32 frame_shift)
int32 frame_size, int32 frame_shift,
bool snip_edges)
: source_(au_src), extractor_(fe),
frame_size_(frame_size), frame_shift_(frame_shift) {}
frame_size_(frame_size), frame_shift_(frame_shift) {
// we need a FrameExtractionOptions to call NumFrames()
// 1000 is just a fake sample rate which equates ms and samples
frame_opts_.samp_freq = 1000;
frame_opts_.frame_shift_ms = frame_shift;
frame_opts_.frame_length_ms = frame_size;
frame_opts_.snip_edges = snip_edges;
}

template<class E> bool
OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
Expand All @@ -311,11 +324,26 @@ OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {

bool ans = source_->Read(&read_samples);

Vector<BaseFloat> all_samples(wave_remainder_.Dim() + read_samples.Dim());
all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_);
all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()).
CopyFromVec(read_samples);

// Extract the features
if (read_samples.Dim() >= frame_size_) {
extractor_->Compute(read_samples, 1.0, output);
if (all_samples.Dim() >= frame_size_) {
// extract waveform remainder before calling Compute()
int32 num_frames = NumFrames(all_samples.Dim(), frame_opts_);
// offset is the amount at the start that has been extracted.
int32 offset = num_frames * frame_shift_;
int32 remaining_len = all_samples.Dim() - offset;
wave_remainder_.Resize(remaining_len);
KALDI_ASSERT(remaining_len >= 0);
if (remaining_len > 0)
wave_remainder_.CopyFromVec(SubVector<BaseFloat>(all_samples, offset, remaining_len));
extractor_->Compute(all_samples, 1.0, output);
} else {
output->Resize(0, 0);
wave_remainder_ = all_samples;
}

return ans;
Expand Down