Skip to content

Commit

Permalink
Work on calculation of MFCCs
Browse files Browse the repository at this point in the history
Issue #492 Minor changes to ensure that Delta and DoubleDelta features are being calculated when set true in the config file.
  • Loading branch information
towsey committed May 25, 2021
1 parent 9a2b560 commit b4bf067
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static void Main(Arguments arguments)
}

/// <summary>
/// In line class used to return results from the static method Audio2Sonogram.GenerateFourSpectrogramImages().
/// In line class used to return results from the static method SpectrogramGenerator.GenerateSpectrogramImages().
/// </summary>
public class AudioToSonogramResult
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
// Default noiseReductionType = Standard
var bgNoiseThreshold = config.BgNoiseThreshold;

// threshold for drawing the difference spectrogram
var differenceThreshold = config.DifferenceThreshold;

// EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep);

Expand Down Expand Up @@ -186,7 +183,10 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
// IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram
if (@do.Contains(DifferenceSpectrogram))
{
// threshold for drawing the difference spectrogram
//var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0;
var differenceThreshold = config.DifferenceThreshold;

var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold);
differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(
differenceImage,
Expand All @@ -202,6 +202,7 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
// The default spectrogram has 64 frequency bands.
if (@do.Contains(MelScaleSpectrogram))
{
sonoConfig.mfccConfig.DoMelScale = true;
images.Add(
MelScaleSpectrogram,
GetMelScaleSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
Expand All @@ -210,9 +211,20 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
// IMAGE 7) Cepstral Spectrogram
if (@do.Contains(CepstralSpectrogram))
{
// TODO at present noise reduction type must be set = Standard.
// ... but use the NoiseReductionParameter that is set in the config file.
//sonoConfig.NoiseReductionParameter = 0.0;
sonoConfig.NoiseReductionType = NoiseReductionType.Standard;

sonoConfig.mfccConfig.DoMelScale = true;

// set the default number of cepstral coefficients
sonoConfig.mfccConfig.CcCount = 12;
sonoConfig.mfccConfig.IncludeDelta = config.IncludeDelta;
sonoConfig.mfccConfig.IncludeDoubleDelta = config.IncludeDoubleDelta;
images.Add(
CepstralSpectrogram,
GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
GetCepstrogram(sonoConfig, recordingSegment, sourceRecordingName));
}

// IMAGE 8) Octave-frequency scale Spectrogram
Expand Down Expand Up @@ -415,8 +427,9 @@ public static Image<Rgb24> GetMelScaleSpectrogram(
string sourceRecordingName)
{
// TODO at present noise reduction type must be set = Standard.
//sonoConfig.NoiseReductionParameter = 3.0;
sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
sonoConfig.NoiseReductionParameter = 3.0;

var melFreqGram = new SpectrogramMelScale(sonoConfig, recording.WavReader);
var image = melFreqGram.GetImage();
var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
Expand All @@ -431,18 +444,15 @@ public static Image<Rgb24> GetMelScaleSpectrogram(
return image;
}

public static Image<Rgb24> GetCepstralSpectrogram(
public static Image<Rgb24> GetCepstrogram(
SonogramConfig sonoConfig,
AudioRecording recording,
string sourceRecordingName)
{
// TODO at present noise reduction type must be set = Standard.
sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
sonoConfig.NoiseReductionParameter = 3.0;
var cepgram = new SpectrogramCepstral(sonoConfig, recording.WavReader);
var image = cepgram.GetImage();
var cepstrogram = new SpectrogramCepstral(sonoConfig, recording.WavReader);
var image = cepstrogram.GetImage();
var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
"CEPSTRO-GRAM " + sourceRecordingName,
"CEPSTROGRAM " + sourceRecordingName,
image.Width,
ImageTags[CepstralSpectrogram]);
var startTime = TimeSpan.Zero;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,23 @@ public class SpectrogramGeneratorConfig : AnalyzerConfig
#pragma warning disable SA1623 // Property summary documentation should match accessors
public int WaveformHeight { get; set; } = 100;

public double BgNoiseThreshold { get; set; } = 3.0;
public double BgNoiseThreshold { get; set; } = 0.0;

/// <summary>
/// DIFFERENCE SPECTROGRAM - PARAMETER (in decibels).
/// </summary>
public double DifferenceThreshold { get; set; } = 3.0;

/// <summary>
/// CEPSTROGRAM - PARAMETER.
/// </summary>
public bool IncludeDelta { get; set; } = false;

/// <summary>
/// CEPSTROGRAM - PARAMETER.
/// </summary>
public bool IncludeDoubleDelta { get; set; } = false;

/// <summary>
/// LOCAL CONTRAST NORMALIZATION PARAMETERS.
/// </summary>
Expand Down
11 changes: 6 additions & 5 deletions src/AudioAnalysisTools/DSP/MFCCStuff.cs
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,9 @@ public static double InverseHerzTranform(double m, double c, double div)
return binBounds;
}

/*
/// <summary>
/// Does MelFilterBank for passed sonogram matrix.
/// Calculates the MelFilterBank for passed sonogram matrix.
/// IMPORTANT !!!!! Assumes that min freq of passed sonogram matrix = 0 Hz and maxFreq = Nyquist.
/// Uses Greg's MelIntegral.
/// </summary>
Expand Down Expand Up @@ -413,9 +414,10 @@ public static double InverseHerzTranform(double m, double c, double div)
return outData;
}
*/

/// <summary>
/// Does mel conversion for sonogram for any frequency band given by minFreq and maxFreq.
/// Does conversion from linear frequency scale to mel-scale for any frequency band given by minFreq and maxFreq.
/// Uses Greg's MelIntegral
/// The first step is to calculate the number of filters for the required frequency sub-band.
/// </summary>
Expand All @@ -429,8 +431,7 @@ public static double InverseHerzTranform(double m, double c, double div)
double freqRange = maxFreq - minFreq;
if (freqRange <= 0)
{
Log.WriteLine("Speech.MelFilterBank(): WARNING!!!! Freq range = zero");
throw new Exception("Speech.LinearFilterBank(): WARNING!!!! Freq range = zero. Check values of min & max freq.");
throw new Exception("FATAL ERROR: Speech.LinearFilterBank(): Freq range = zero. Check values of min & max freq.");
}

double melNyquist = Mel(nyquist);
Expand Down Expand Up @@ -514,7 +515,7 @@ public static double InverseHerzTranform(double m, double c, double div)
//********************************************************************************************************************
//********************************************************************************************************************
//********************************************************************************************************************
//******************************* CEPTRA COEFFICIENTS USING DCT AND COSINES
//******************************* CALCULATION OF CEPTRAL COEFFICIENTS USING DCT AND COSINES

public static double[,] Cepstra(double[,] spectra, int coeffCount)
{
Expand Down
15 changes: 11 additions & 4 deletions src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ public SpectrogramMelScale(AmplitudeSonogram sg, int minHz, int maxHz)

/// <summary>
/// Converts amplitude matrix to mel-frequency scale spectrogram.
/// IMPORTANT NOTE: The conversion to Mel-scale is done BEFORE noise reduction.
/// And conversion to decibels is done after noise reduction.
/// </summary>
/// <param name="amplitudeM">Matrix of amplitude values.</param>
public override void Make(double[,] amplitudeM)
{
// call static method to convert amplitude spectrogram to melscale.
var m = MakeMelScaleSpectrogram(this.Configuration, amplitudeM, this.SampleRate);

//(iii) NOISE REDUCTION
Expand All @@ -79,21 +82,24 @@ public override void Make(double[,] amplitudeM)
//##################################################################################################################################

/// <summary>
/// Converts an amplitude spectrogram to mel-scale spectrogram.
/// NOTE!!!! The decibel array has been normalised in 0 - 1.
/// </summary>
public static double[,] MakeMelScaleSpectrogram(SonogramConfig config, double[,] matrix, int sampleRate)
public static double[,] MakeMelScaleSpectrogram(SonogramConfig config, double[,] amplitudeM, int sampleRate)
{
double[,] m = matrix;
double[,] m = amplitudeM;
int nyquist = sampleRate / 2;
double epsilon = config.epsilon;

//(i) APPLY FILTER BANK
//number of Hz bands = 2^N +1. Subtract DC bin
int fftBinCount = config.FreqBinCount;

// This config has the default values set for calculating MFCCs.
// Mel band count is set to 64 by default in BaseSonogramConfig class at line 158.
// Coefficient count is set to 12 by default.
int bandCount = config.mfccConfig.FilterbankCount;
Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1));
Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + amplitudeM.GetLength(1));

//error check that filterBankCount < Number of FFT bins
if (bandCount > fftBinCount)
Expand All @@ -104,12 +110,13 @@ public override void Make(double[,] amplitudeM)
}

//this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth.
// The last two arguments set the frequency band.
m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, 0, nyquist);

Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")");

//(ii) CONVERT AMPLITUDES TO DECIBELS
m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram
m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon);
return m;
}

Expand Down

0 comments on commit b4bf067

Please sign in to comment.