Skip to content

Commit

Permalink
Add pre-emphasis option to extraction of signal envelope and doing ffts.
Browse files Browse the repository at this point in the history
Issue #492 Fixing up the calculation of mfcc feature vectors required the incorporation of signal pre-emphasis as an option. This is supposed to be good for speech recognition. All these changes are just related to adding in the preemphasis boolean to the method DSP_Frames.ExtractEnvelopeAndFfts().
  • Loading branch information
towsey committed May 27, 2021
1 parent 1c5ad90 commit 8561315
Show file tree
Hide file tree
Showing 13 changed files with 55 additions and 32 deletions.
4 changes: 3 additions & 1 deletion src/AnalysisPrograms/Create4Sonograms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ namespace AnalysisPrograms
using Path = System.IO.Path;

/// <summary>
/// TODO: THIS CLASS NOW OBSOLETE. CAN REMOVE.
/// REPLACED BY SPECTROGRAM GENERATOR CLASS.
/// Call this class by using the activity (first command line argument) "Create4Sonograms".
/// </summary>
public static class Create4Sonograms
Expand Down Expand Up @@ -113,7 +115,7 @@ public static void Main(Arguments arguments)
var recording = new AudioRecording(fiOutputSegment.FullName);

// EXTRACT ENVELOPE and SPECTROGRAM// This call uses the default FFT window.
var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);
var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, false, frameSize, windowOverlap);

// average absolute value over the minute recording
////double[] avAbsolute = dspOutput.Average;
Expand Down
1 change: 1 addition & 0 deletions src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public static Tuple<BaseSonogram, double[,], double[], List<AcousticEvent>> Dete
recording.WavReader.Samples,
sr,
epsilon,
false,
windowSize,
windowOverlap);
double[] avAbsolute = results2.Average; //average absolute value over the minute recording
Expand Down
8 changes: 1 addition & 7 deletions src/AnalysisPrograms/SnrAnalysis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,6 @@ public static void Execute(Arguments arguments)
double vocalGap = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0;
double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0;

//bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true; //options to draw sonogram

//double intensityThreshold = Acoustics.AED.Default.intensityThreshold;
//if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]);
//int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold;
//if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD)) smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]);

// COnvert input recording into wav
var convertParameters = new AudioUtilityRequest { TargetSampleRate = 17640 };
var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav"));
Expand Down Expand Up @@ -117,6 +110,7 @@ public static void Execute(Arguments arguments)
// Calling this method will set the default FFT window.
var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(
recording,
false,
sonoConfig.WindowSize,
sonoConfig.WindowOverlap);

Expand Down
10 changes: 6 additions & 4 deletions src/AudioAnalysisTools/ChannelIntegrity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,14 @@ public static void SimilarityIndex(double[] channelL, double[] channelR, double
int frameStep = 512;
frameSize *= 16; // take longer window to get low freq
frameStep *= 16;
bool doPreemphasis = false;

var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram);

//var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram);

var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram);

//var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram);
Expand Down Expand Up @@ -193,11 +194,12 @@ public static double SimilarityIndex2(double[] channelL, double[] channelR, doub
//var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep);
int frameSize = 512;
int frameStep = 512;
bool doPreemphasis = false;

var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
var spgrmL = dspOutputL.AmplitudeSpectrogram;

var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
var spgrmR = dspOutputR.AmplitudeSpectrogram;

double similarityIndex = 0;
Expand Down
28 changes: 17 additions & 11 deletions src/AudioAnalysisTools/DSP/DSP_Frames.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,27 +137,28 @@ public static int FrameStep(int windowSize, double windowOverlap)
/// Calling this method will set default FFT window if windowName is null.
/// Otherwise sets the FFT window specified in the config file.
/// </summary>
public static EnvelopeAndFft ExtractEnvelopeAndFfts(AudioRecording recording, int frameSize, double overlap, string windowName = null)
public static EnvelopeAndFft ExtractEnvelopeAndFfts(AudioRecording recording, bool doPreemphasis, int frameSize, double overlap, string windowName = null)
{
int frameStep = (int)(frameSize * (1 - overlap));
return ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep, windowName);
return ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, doPreemphasis, frameSize, frameStep, windowName);
}

/// <summary>
/// Calling this method sets the default FFT window, currently HANNING - see FFT.cs line 22.
/// </summary>
public static EnvelopeAndFft ExtractEnvelopeAndFfts(AudioRecording recording, int frameSize, int frameStep)
public static EnvelopeAndFft ExtractEnvelopeAndFfts(AudioRecording recording, bool doPreemphasis, int frameSize, int frameStep)
{
return ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep, FFT.DefaultFftWindow);
return ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, doPreemphasis, frameSize, frameStep, FFT.DefaultFftWindow);
}

/// <summary>
/// Calling this method sets the default FFT window, currently HANNING - see FFT.cs line 22.
/// Same as previous method but use frame overlap (a double) as an argument rather than framestep (an integer).
/// </summary>
public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram(double[] signal, int sampleRate, double epsilon, int frameSize, double overlap)
public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram(double[] signal, int sampleRate, double epsilon, bool doPreemphasis, int frameSize, double overlap)
{
int frameStep = (int)(frameSize * (1 - overlap));
return ExtractEnvelopeAndAmplSpectrogram(signal, sampleRate, epsilon, frameSize, frameStep, FFT.DefaultFftWindow);
return ExtractEnvelopeAndAmplSpectrogram(signal, sampleRate, epsilon, doPreemphasis, frameSize, frameStep, FFT.DefaultFftWindow);
}

/// <summary>
Expand All @@ -183,16 +184,17 @@ public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram(
double[] signal,
int sampleRate,
double epsilon,
bool doPreemphasis,
int frameSize,
int frameStep,
string windowName = null)
{
// SIGNAL PRE-EMPHASIS helps with speech signals
// SIGNAL PRE-EMPHASIS helps with speech signals.
// Do not use this for environmental audio
//if (config.DoPreemphasis)
//{
// signal = DSP_Filters.PreEmphasis(signal, 0.96);
//}
if (doPreemphasis)
{
signal = DspFilters.PreEmphasis(signal, 0.96);
}

int[,] frameIDs = FrameStartEnds(signal.Length, frameSize, frameStep);
if (frameIDs == null)
Expand All @@ -218,7 +220,11 @@ public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram(
double[] minValues = new double[frameCount];
double[] maxValues = new double[frameCount];
double[] envelope = new double[frameCount];

// The average sample energy in a frame. Energy = sample value squared.
double[] frameEnergy = new double[frameCount];

// The log base 10 of the frame energy.
double[] frameDecibels = new double[frameCount];

// for all frames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ public static EventStatistics AnalyzeAudioEvent(
// convert recording to spectrogram
int sampleRate = recording.SampleRate;
double epsilon = recording.Epsilon;
bool doPreemphasis = false; // default value

// extract the spectrogram
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep);
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, doPreemphasis, config.FrameSize, config.FrameStep);

double hertzBinWidth = dspOutput1.FreqBinWidth;
var stepDurationInSeconds = config.FrameStep / (double)sampleRate;
Expand Down
8 changes: 6 additions & 2 deletions src/AudioAnalysisTools/Indices/IndexCalculate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ public static IndexCalculateResult Analysis(
{
// returnSonogramInfo = true; // if debugging
double epsilon = recording.Epsilon;

// never do signal preemphasis when calculating indices.
bool doPreemphasis = false;

int signalLength = recording.WavReader.GetChannel(0).Length;
int sampleRate = recording.WavReader.SampleRate;
var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
Expand Down Expand Up @@ -138,7 +142,7 @@ public static IndexCalculateResult Analysis(
// ################################## NOW GET THE AMPLITUDE SPECTORGRAMS

// EXTRACT ENVELOPE and SPECTROGRAM FROM SUBSEGMENT
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(subsegmentRecording, frameSize, frameStep);
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(subsegmentRecording, doPreemphasis, frameSize, frameStep);

// Select band according to min and max bandwidth
int minBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MinBandWidth);
Expand Down Expand Up @@ -202,7 +206,7 @@ public static IndexCalculateResult Analysis(
var bgnRecording = AudioRecording.GetRecordingSubsegment(recording, startSample, endSample, sampleBuffer);

// EXTRACT ENVELOPE and SPECTROGRAM FROM BACKGROUND NOISE SUBSEGMENT
dspOutput2 = DSP_Frames.ExtractEnvelopeAndFfts(bgnRecording, frameSize, frameStep);
dspOutput2 = DSP_Frames.ExtractEnvelopeAndFfts(bgnRecording, doPreemphasis, frameSize, frameStep);

// If necessary, recalculate the spectrogram according to octave scale. This option works only when have high SR recordings.
if (octaveScale)
Expand Down
3 changes: 2 additions & 1 deletion src/AudioAnalysisTools/Indices/IndexCalculateSixOnly.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ public static SpectralIndexValuesForContentDescription Analysis(

// EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
// Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns.
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep);
bool doPreemphasis = false;
var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, doPreemphasis, frameSize, frameStep);
var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram;

// (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
Expand Down
4 changes: 3 additions & 1 deletion src/AudioAnalysisTools/Ocillations/Oscillations2014.cs
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,10 @@ public static Dictionary<string, string> GetConfigDictionary(FileInfo configFile

// AT: Switched to below method of extracting the spectrogram because BaseSonogram
// does not allow small spectrograms (less than 0.2s) to calculated.
// set default value for preemphasis. Only used when dealing with human speech.
bool doPreemphasis = false;

var fft = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameLength, frameLength);
var fft = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, doPreemphasis, frameLength, frameLength);
return fft.AmplitudeSpectrogram;
}

Expand Down
4 changes: 2 additions & 2 deletions src/AudioAnalysisTools/SpectralClustering.cs
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ public static SpectrogramStandard GetStandardSpectrogram(AudioRecording recordin
int frameStep = frameSize;

// get amplitude spectrogram and remove the DC column ie column zero.
var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);
var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, false, frameSize, frameStep);

// remove background noise from the full amplitude spectrogram
const double sdCount = 0.1;
Expand All @@ -530,7 +530,7 @@ public static SpectrogramStandard GetStandardSpectrogram(AudioRecording recordin
int frameStep = frameSize;

// get decibel spectrogram
var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);
var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, false, frameSize, frameStep);
var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon);

// remove background noise from spectrogram
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav)
this.Attributes.FramesPerSecond = wav.SampleRate / (double)config.WindowStep;

var recording = new AudioRecording(wav);

// set the default value for pre-emphasis
bool doPreemphasis = false;
var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
recording,
doPreemphasis,
config.WindowSize,
config.WindowOverlap,
this.Configuration.WindowFunction);
Expand Down
7 changes: 6 additions & 1 deletion src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,14 @@ public abstract partial class BaseSonogram
public SNR SnrData { get; set; }

/// <summary>
/// Gets or sets decibels per signal frame.
/// Gets or sets decibels per signal frame. i.e. log frame energy.
/// </summary>
public double[] DecibelsPerFrame { get; set; }

/// <summary>
/// Gets or sets the array of frame log-energy values normalised 0,1.
/// This is derived from the array variable DecibelsPerFrame[].
/// </summary>
public double[] DecibelsNormalised { get; set; }

/// <summary>
Expand Down Expand Up @@ -185,6 +189,7 @@ private void InitialiseSpectrogram(WavReader wav)
var recording = new AudioRecording(wav);
var fftData = DSP_Frames.ExtractEnvelopeAndFfts(
recording,
this.Configuration.DoPreemphasis,
this.Configuration.WindowSize,
this.Configuration.WindowOverlap,
this.Configuration.WindowFunction);
Expand Down
3 changes: 2 additions & 1 deletion src/AudioAnalysisTools/Tracks/SpectralPeakTracks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,8 @@ public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording reco
int ridgeBuffer = frameSize * bufferFrameCount;
var ridgeRecording = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer);
int frameStep = frameSize;
var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep);
bool doPreemphasis = false; // default value
var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, doPreemphasis, frameSize, frameStep);

// Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
// i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
Expand Down

0 comments on commit 8561315

Please sign in to comment.