diff --git a/src/feat/feature-fbank.cc b/src/feat/feature-fbank.cc index c54069696b5..2c79a9d10cf 100644 --- a/src/feat/feature-fbank.cc +++ b/src/feat/feature-fbank.cc @@ -83,7 +83,7 @@ void FbankComputer::Compute(BaseFloat signal_log_energy, // Compute energy after window function (not the raw one). if (opts_.use_energy && !opts_.raw_energy) signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::min())); + std::numeric_limits::min())); if (srfft_ != NULL) // Compute FFT using split-radix algorithm. srfft_->Compute(signal_frame->Data(), true); @@ -108,7 +108,7 @@ void FbankComputer::Compute(BaseFloat signal_log_energy, mel_banks.Compute(power_spectrum, &mel_energies); if (opts_.use_log_fbank) { // Avoid log of zero (which should be prevented anyway by dithering). - mel_energies.ApplyFloor(std::numeric_limits::epsilon()); + mel_energies.ApplyFloor(std::numeric_limits::epsilon()); mel_energies.ApplyLog(); // take the log. } diff --git a/src/feat/feature-fbank.h b/src/feat/feature-fbank.h index 41ef2eef50a..5acd33522fc 100644 --- a/src/feat/feature-fbank.h +++ b/src/feat/feature-fbank.h @@ -53,7 +53,7 @@ struct FbankOptions { // this seems to be common for 16khz-sampled data, // but for 8khz-sampled data, 15 may be better. use_energy(false), - energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 + energy_floor(0.0), raw_energy(true), htk_compat(false), use_log_fbank(true), @@ -65,7 +65,9 @@ struct FbankOptions { opts->Register("use-energy", &use_energy, "Add an extra dimension with energy to the FBANK output."); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in FBANK computation"); + "Floor on energy (absolute, not relative) in MFCC computation. " + "Only makes a difference if --use-energy=true; only necessary if " + "--dither=0.0. Suggested values: 0.1 or 1.0"); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); opts->Register("htk-compat", &htk_compat, "If true, put energy last. " diff --git a/src/feat/feature-mfcc.cc b/src/feat/feature-mfcc.cc index 122ba1b100d..71e9c0ad3e2 100644 --- a/src/feat/feature-mfcc.cc +++ b/src/feat/feature-mfcc.cc @@ -36,7 +36,7 @@ void MfccComputer::Compute(BaseFloat signal_log_energy, if (opts_.use_energy && !opts_.raw_energy) signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::min())); + std::numeric_limits::min())); if (srfft_ != NULL) // Compute FFT using the split-radix algorithm. srfft_->Compute(signal_frame->Data(), true); @@ -51,7 +51,7 @@ void MfccComputer::Compute(BaseFloat signal_log_energy, mel_banks.Compute(power_spectrum, &mel_energies_); // avoid log of zero (which should be prevented anyway by dithering). - mel_energies_.ApplyFloor(std::numeric_limits::epsilon()); + mel_energies_.ApplyFloor(std::numeric_limits::epsilon()); mel_energies_.ApplyLog(); // take the log. feature->SetZero(); // in case there were NaNs. diff --git a/src/feat/feature-mfcc.h b/src/feat/feature-mfcc.h index d1d2b8f9d09..66c52e89821 100644 --- a/src/feat/feature-mfcc.h +++ b/src/feat/feature-mfcc.h @@ -40,7 +40,8 @@ struct MfccOptions { MelBanksOptions mel_opts; int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero. bool use_energy; // use energy; else C0 - BaseFloat energy_floor; + BaseFloat energy_floor; // 0 by default; set to a value like 1.0 or 0.1 if + // you disable dithering. bool raw_energy; // If true, compute energy before preemphasis and windowing BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility. // if 0.0, no liftering is done. @@ -53,7 +54,7 @@ struct MfccOptions { // but for 8khz-sampled data, 15 may be better. num_ceps(13), use_energy(true), - energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 + energy_floor(0.0), raw_energy(true), cepstral_lifter(22.0), htk_compat(false) {} @@ -66,7 +67,9 @@ struct MfccOptions { opts->Register("use-energy", &use_energy, "Use energy (not C0) in MFCC computation"); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in MFCC computation"); + "Floor on energy (absolute, not relative) in MFCC computation. " + "Only makes a difference if --use-energy=true; only necessary if " + "--dither=0.0. Suggested values: 0.1 or 1.0"); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); opts->Register("cepstral-lifter", &cepstral_lifter, diff --git a/src/feat/feature-plp.cc b/src/feat/feature-plp.cc index 719e55dd6da..8e078ab15f6 100644 --- a/src/feat/feature-plp.cc +++ b/src/feat/feature-plp.cc @@ -125,7 +125,7 @@ void PlpComputer::Compute(BaseFloat signal_log_energy, if (opts_.use_energy && !opts_.raw_energy) signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::min())); + std::numeric_limits::min())); if (srfft_ != NULL) // Compute FFT using split-radix algorithm. srfft_->Compute(signal_frame->Data(), true); @@ -160,7 +160,7 @@ void PlpComputer::Compute(BaseFloat signal_log_energy, BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_); residual_log_energy = std::max(residual_log_energy, - std::numeric_limits::min()); + std::numeric_limits::min()); Lpc2Cepstrum(opts_.lpc_order, lpc_coeffs_.Data(), raw_cepstrum_.Data()); feature->Range(1, opts_.num_ceps - 1).CopyFromVec( diff --git a/src/feat/feature-plp.h b/src/feat/feature-plp.h index d7deab07ec1..5b7ef073047 100644 --- a/src/feat/feature-plp.h +++ b/src/feat/feature-plp.h @@ -61,7 +61,7 @@ struct PlpOptions { lpc_order(12), num_ceps(13), use_energy(true), - energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 + energy_floor(0.0), raw_energy(true), compress_factor(0.33333), cepstral_lifter(22), @@ -78,7 +78,9 @@ struct PlpOptions { opts->Register("use-energy", &use_energy, "Use energy (not C0) for zeroth PLP feature"); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in PLP computation"); + "Floor on energy (absolute, not relative) in MFCC computation. " + "Only makes a difference if --use-energy=true; only necessary if " + "--dither=0.0. Suggested values: 0.1 or 1.0"); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); opts->Register("compress-factor", &compress_factor, diff --git a/src/feat/feature-spectrogram.cc b/src/feat/feature-spectrogram.cc index 953f38fc54f..7b380e7ad25 100644 --- a/src/feat/feature-spectrogram.cc +++ b/src/feat/feature-spectrogram.cc @@ -55,7 +55,7 @@ void SpectrogramComputer::Compute(BaseFloat signal_log_energy, // Compute energy after window function (not the raw one) if (!opts_.raw_energy) signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::epsilon())); + std::numeric_limits::epsilon())); if (srfft_ != NULL) // Compute FFT using split-radix algorithm. srfft_->Compute(signal_frame->Data(), true); @@ -67,7 +67,7 @@ void SpectrogramComputer::Compute(BaseFloat signal_log_energy, SubVector power_spectrum(*signal_frame, 0, signal_frame->Dim() / 2 + 1); - power_spectrum.ApplyFloor(std::numeric_limits::epsilon()); + power_spectrum.ApplyFloor(std::numeric_limits::epsilon()); power_spectrum.ApplyLog(); feature->CopyFromVec(power_spectrum); diff --git a/src/feat/feature-spectrogram.h b/src/feat/feature-spectrogram.h index ec318556f24..9aeb68c8df8 100644 --- a/src/feat/feature-spectrogram.h +++ b/src/feat/feature-spectrogram.h @@ -41,13 +41,17 @@ struct SpectrogramOptions { bool raw_energy; // If true, compute energy before preemphasis and windowing SpectrogramOptions() : - energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 + energy_floor(0.0), raw_energy(true) {} void Register(OptionsItf *opts) { frame_opts.Register(opts); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in Spectrogram computation"); + "Floor on energy (absolute, not relative) in Spectrogram " + "computation. Caution: this floor is applied to the zeroth " + "component, representing the total signal energy. The " + "floor on the individual spectrogram elements is fixed at " + "std::numeric_limits::epsilon()."); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); } diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc index 98afe1849e9..9ee10e47806 100644 --- a/src/feat/feature-window.cc +++ b/src/feat/feature-window.cc @@ -145,7 +145,7 @@ void ProcessWindow(const FrameExtractionOptions &opts, if (log_energy_pre_window != NULL) { BaseFloat energy = std::max(VecVec(*window, *window), - std::numeric_limits::epsilon()); + std::numeric_limits::epsilon()); *log_energy_pre_window = Log(energy); } diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index a897c6fa4b0..e1b3ed048c1 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -71,7 +71,9 @@ struct FrameExtractionOptions { "Coefficient for use in signal preemphasis"); opts->Register("remove-dc-offset", &remove_dc_offset, "Subtract mean from waveform on each frame"); - opts->Register("dither", &dither, "Dithering constant (0.0 means no dither)"); + opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). " + "If you turn this off, you should set the --energy-floor " + "option, e.g. to 1.0 or 0.1"); opts->Register("window-type", &window_type, "Type of window " "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\"" "|\"blackmann\")");