Issue492 revise mfcc code (#497)

* Work on calculation of MFCCs Issue #492 Minor changes to ensure that Delta and DoubleDelta features are being calculated when set true in the config file. * Add more comments to clarify Issue #492 Add ing comments for clarity. Adding more mfcc parameters so user has more control over the calculations. Previously there were default values for some parameters. * Add pre-emphasis option to extraction of signal envelope and doing ffts. Issue #492 Fixing up the calculation of mfcc feature vectors required the incorporation of signal pre-emphasis as an option. This is supposed to be good for speech recognition. All these changes are just related to adding in the preemphasis boolean to the method DSP_Frames.ExtractEnvelopeAndFfts(). * More changes to do with pre-emphasis of an audio signal Issue #492 * Revise code for calculation of MFCC's Issue #492 * Update MatrixTools.cs Issue #492 This mfcc processing step was supposed to be helpful. However the ceptral features it produced were noisy, so did not incorporate but may be useful in some other context. * Minor changes Issue #492 * Update ConfigurationClasses.cs Issue #492 Add another constructor to MfccConfiguration class because the existing constructor uses discontinued dictionary. * Create a unit test class for MfccSpectrograms. Issue #492 Begin work on unit tests for some spectrogram classes. * Work on more tests for mel-scale and mfcc spectrograms. Issue #492 * Update SpectrogramCepstral.cs Issue #492 Small changes to comment and method name * Update MFCCStuff.cs Issue #492 1) Correct an error in the calculation of delta and double-delta coefficients. 2) Remove duplication of a tricky method that normalizes spectral values for window power and SR. 3) Fix up method comments. * Update MatrixTools.cs Issue #492 Add simple matrix method. Required after removing aforementioned method duplication. * Add more unit tests for calculation of mel freq and mfcc coefficients. * Update MFCCStuff.cs Issue #492 Remove obsolete code. * Update SpectrogramGenerator.Core.cs Issue #492 Ensure that the mel scale spectrogram parameters are passed to the drawing method. * Update MatrixTools.cs Issue #492 Make method accept double * Update MFCCStuff.cs Issue #492 Main change is to ensure that the epsilon value passed to GetLogEnergySpectrogram() is squared before being used. * Update SpectrogramMelScale.cs Issue #492 Main change is to remove spectrogram normalisation at line 79. So the Make method returns a spectrogram having decibel values. * Update SpectrogramCepstral.cs Issue #492 Change old line 88. When preparing a cepstrogram, the linear filter bank is not a valid option. Other changes are to comments. * Get all spectrogram tests working. Issue #492 Once all three tests were working as expected, had to rewrite three binary files in Test/Fixtures. * Fixed unit tests. Issue #792 Required an extra binary file to cover a previously added test involving pre-emphasis * Change test class Issue $492 Refactor the test class to accord with Anthony's requests. * Remove use of binary testing files Issue #492 remove use of binary files for testing matrix values. Also use the proper system for checking test images. * Remove binary files from fixtures Issue #492 These are no longer required for testing. Now used precalculated values. * Bring two config files up to date. Issue #492 Bring SpectrogramGenerator config file up to date with previous changes. * Edit test files Issue #492 Bring test files up to date with new requirements. No longer use pre-calculated binary files. Instead usea limited number of pre-calculated values. * Fix unit test Issue #492 THis error was due to different config values between config file and default constructor. * Removed unnecessary references to output directory Issue #492 Removed unnecessary references to output directory as requested by Anthony.
QutEcoacoustics · Jun 9, 2021 · 2099caf · 2099caf
1 parent 9a2b560
commit 2099caf
Show file tree

Hide file tree

Showing 44 changed files with 875 additions and 644 deletions.
diff --git a/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml b/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml
@@ -37,13 +37,26 @@ Images:
 WaveformHeight: 100
 
 # NOISE REDUCTION PARAMETERS
-# NoiseReductionType: Standard  # use the standard by defulat.
-BgNoiseThreshold: 3.0
+NoiseReductionType: Standard
+BgNoiseThreshold: 0.0
 
 # DIFFERENCE SPECTROGRAM - PARAMETER (in decibels)
 DifferenceThreshold: 3.0 
 
+# CEPSTROGRAM
+DoPreemphasis: false
+# The number of filters in the Mel-scale filter bank.
+#Typical values range over 64 (the default), 32, 26 (the minimum value I have come across).
+FilterbankCount: 64
+IncludeDelta: true
+IncludeDoubleDelta: true
+
 # LOCAL CONTRAST NORMALISATION PARAMETERS
+#   A low contrastLevel = 0.1 gives a more grey image.
+#   A high contrastLevel = 1.0 gives mostly white, high contrast image.
+#   The algorithm is not overly sensitive to the neighbourhood size.
+#NeighborhoodSeconds: 1.0 #DEFAULT
+#LcnContrastLevel: 0.2 #DEFAULT
 NeighbourhoodSeconds: 1.0
 LcnContrastLevel: 0.2
 

diff --git a/src/AnalysisPrograms/Create4Sonograms.cs b/src/AnalysisPrograms/Create4Sonograms.cs
@@ -24,6 +24,8 @@ namespace AnalysisPrograms
     using Path = System.IO.Path;
 
     /// <summary>
+    /// TODO: THIS CLASS NOW OBSOLETE. CAN REMOVE.
+    ///       REPLACED BY SPECTROGRAM GENERATOR CLASS.
     /// Call this class by using the activity (first command line argument) "Create4Sonograms".
     /// </summary>
     public static class Create4Sonograms
@@ -113,7 +115,7 @@ public static void Main(Arguments arguments)
             var recording = new AudioRecording(fiOutputSegment.FullName);
 
             // EXTRACT ENVELOPE and SPECTROGRAM// This call uses the default FFT window.
-            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);
+            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, false, frameSize, windowOverlap);
 
             // average absolute value over the minute recording
             ////double[] avAbsolute = dspOutput.Average;

diff --git a/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs b/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs
@@ -110,6 +110,7 @@ public static Tuple<BaseSonogram, double[,], double[], List<AcousticEvent>> Dete
                 recording.WavReader.Samples,
                 sr,
                 epsilon,
+                false,
                 windowSize,
                 windowOverlap);
             double[] avAbsolute = results2.Average; //average absolute value over the minute recording

diff --git a/src/AnalysisPrograms/SnrAnalysis.cs b/src/AnalysisPrograms/SnrAnalysis.cs
@@ -78,13 +78,6 @@ public static void Execute(Arguments arguments)
             double vocalGap = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0;
             double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0;
 
-            //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true;    //options to draw sonogram
-
-            //double intensityThreshold = Acoustics.AED.Default.intensityThreshold;
-            //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]);
-            //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold;
-            //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD))   smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]);
-
             // COnvert input recording into wav
             var convertParameters = new AudioUtilityRequest { TargetSampleRate = 17640 };
             var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav"));
@@ -117,6 +110,7 @@ public static void Execute(Arguments arguments)
             // Calling this method will set the default FFT window.
             var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(
                 recording,
+                false,
                 sonoConfig.WindowSize,
                 sonoConfig.WindowOverlap);
 

diff --git a/src/AnalysisPrograms/SpectrogramGenerator/Audio2Sonogram.Entry.cs b/src/AnalysisPrograms/SpectrogramGenerator/Audio2Sonogram.Entry.cs
@@ -101,7 +101,7 @@ public static void Main(Arguments arguments)
     }
 
     /// <summary>
-    /// In line class used to return results from the static method Audio2Sonogram.GenerateFourSpectrogramImages().
+    /// In line class used to return results from the static method SpectrogramGenerator.GenerateSpectrogramImages().
     /// </summary>
     public class AudioToSonogramResult
     {

diff --git a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs
@@ -43,7 +43,7 @@ static SpectrogramGenerator()
         /// DecibelSpectrogram.
         /// DecibelSpectrogramNoiseReduced.
         /// MelScaleSpectrogram
-        /// CepstralSpectrogram.
+        /// Cepstrogram.
         /// OctaveScaleSpectrogram
         /// RibbonSpectrogram.
         /// DifferenceSpectrogram.
@@ -76,11 +76,12 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
             // Default noiseReductionType = Standard
             var bgNoiseThreshold = config.BgNoiseThreshold;
 
-            // threshold for drawing the difference spectrogram
-            var differenceThreshold = config.DifferenceThreshold;
+            // set pre-emphasis to the default value false.
+            bool doPreemphasis = false;
 
             // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
-            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep);
+            // The output from this call to ExtractEnvelopeAndFfts is used only for standard spectrograms.
+            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, doPreemphasis, frameSize, frameStep);
 
             // This constructor initializes default values for Melscale and Mfcc spectrograms and other parameters.
             var sonoConfig = new SonogramConfig()
@@ -186,7 +187,10 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
                 // IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram
                 if (@do.Contains(DifferenceSpectrogram))
                 {
+                    // threshold for drawing the difference spectrogram
                     //var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0;
+                    var differenceThreshold = config.DifferenceThreshold;
+
                     var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold);
                     differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(
                         differenceImage,
@@ -202,6 +206,9 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
             // The default spectrogram has 64 frequency bands.
             if (@do.Contains(MelScaleSpectrogram))
             {
+                sonoConfig.DoPreemphasis = config.DoPreemphasis;
+                sonoConfig.mfccConfig.DoMelScale = true;
+                sonoConfig.mfccConfig.FilterbankCount = config.FilterbankCount;
                 images.Add(
                     MelScaleSpectrogram,
                     GetMelScaleSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
@@ -210,9 +217,23 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
             // IMAGE 7) Cepstral Spectrogram
             if (@do.Contains(CepstralSpectrogram))
             {
-                images.Add(
-                    CepstralSpectrogram,
-                    GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
+                // The cepstrogram requires additional config settings. Cannot use previous spectrograms.
+                // Set up the config file.
+                // Use some defaults and get other parameters from config file.
+                sonoConfig.DoPreemphasis = config.DoPreemphasis;
+
+                // TODO CHECK IF THERE IS A NEED FOR NOISE REDUCTION
+                sonoConfig.NoiseReductionParameter = 0.0;
+                sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
+
+                sonoConfig.mfccConfig.DoMelScale = true;
+                sonoConfig.mfccConfig.FilterbankCount = config.FilterbankCount;
+
+                // set the default number of cepstral coefficients
+                sonoConfig.mfccConfig.CcCount = 12;
+                sonoConfig.mfccConfig.IncludeDelta = config.IncludeDelta;
+                sonoConfig.mfccConfig.IncludeDoubleDelta = config.IncludeDoubleDelta;
+                images.Add(CepstralSpectrogram, GetCepstrogram(sonoConfig, recordingSegment, sourceRecordingName));
             }
 
             // IMAGE 8) Octave-frequency scale Spectrogram
@@ -415,8 +436,9 @@ public static Image<Rgb24> GetMelScaleSpectrogram(
             string sourceRecordingName)
         {
             // TODO at present noise reduction type must be set = Standard.
+            //sonoConfig.NoiseReductionParameter = 3.0;
             sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
-            sonoConfig.NoiseReductionParameter = 3.0;
+
             var melFreqGram = new SpectrogramMelScale(sonoConfig, recording.WavReader);
             var image = melFreqGram.GetImage();
             var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
@@ -431,23 +453,26 @@ public static Image<Rgb24> GetMelScaleSpectrogram(
             return image;
         }
 
-        public static Image<Rgb24> GetCepstralSpectrogram(
-            SonogramConfig sonoConfig,
+        /// <summary>
+        /// Returns a cepstrogram image.
+        /// </summary>
+        public static Image<Rgb24> GetCepstrogram(
+            SonogramConfig config,
             AudioRecording recording,
             string sourceRecordingName)
         {
-            // TODO at present noise reduction type must be set = Standard.
-            sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
-            sonoConfig.NoiseReductionParameter = 3.0;
-            var cepgram = new SpectrogramCepstral(sonoConfig, recording.WavReader);
-            var image = cepgram.GetImage();
+            // Get the cepstrogram
+            var cepstrogram = new SpectrogramCepstral(config, recording.WavReader);
+
+            // Now prepare it as an image.
+            var image = cepstrogram.GetImage();
             var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
-                    "CEPSTRO-GRAM " + sourceRecordingName,
+                    "CEPSTROGRAM " + sourceRecordingName,
                     image.Width,
                     ImageTags[CepstralSpectrogram]);
             var startTime = TimeSpan.Zero;
             var xAxisTicInterval = TimeSpan.FromSeconds(1);
-            TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate);
+            TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(config.WindowStep / (double)config.SampleRate);
             var labelInterval = TimeSpan.FromSeconds(5);
             image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval);
             return image;

diff --git a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorConfig.cs b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorConfig.cs
@@ -12,13 +12,44 @@ public class SpectrogramGeneratorConfig : AnalyzerConfig
 #pragma warning disable SA1623 // Property summary documentation should match accessors
         public int WaveformHeight { get; set; } = 100;
 
-        public double BgNoiseThreshold { get; set; } = 3.0;
+        /// <summary>
+        /// The default threshold = zero decibels.
+        /// This removes the least background noise.
+        /// Values up to 4 decibels are possibly effective.
+        /// </summary>
+        public double BgNoiseThreshold { get; set; } = 0.0;
 
         /// <summary>
         /// DIFFERENCE SPECTROGRAM - PARAMETER (in decibels).
         /// </summary>
         public double DifferenceThreshold { get; set; } = 3.0;
 
+        /// <summary>
+        /// CEPSTROGRAM - PARAMETER.
+        /// Do pre-emphasis prior to FFT.
+        /// </summary>
+        public bool DoPreemphasis { get; set; } = false;
+
+        /// <summary>
+        /// CEPSTROGRAM - PARAMETER
+        /// The size of the Mel-scale filter bank.
+        /// The default value is 64.
+        /// THe minimum I have seen referenced = 26.
+        /// </summary>
+        public int FilterbankCount { get; set; } = 64;
+
+        /// <summary>
+        /// CEPSTROGRAM - PARAMETER.
+        /// Include the delta features in the returned MFCC feature vector.
+        /// </summary>
+        public bool IncludeDelta { get; set; } = false;
+
+        /// <summary>
+        /// CEPSTROGRAM - PARAMETER.
+        /// Include the delta-delta or acceleration features in the returned MFCC feature vector.
+        /// </summary>
+        public bool IncludeDoubleDelta { get; set; } = false;
+
         /// <summary>
         /// LOCAL CONTRAST NORMALIZATION PARAMETERS.
         /// </summary>

diff --git a/src/AudioAnalysisTools/ChannelIntegrity.cs b/src/AudioAnalysisTools/ChannelIntegrity.cs
@@ -111,13 +111,14 @@ public static void SimilarityIndex(double[] channelL, double[] channelR, double
             int frameStep = 512;
             frameSize *= 16; // take longer window to get low freq
             frameStep *= 16;
+            bool doPreemphasis = false;
 
-            var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
+            var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
             var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram);
 
             //var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram);
 
-            var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
+            var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
             var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram);
 
             //var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram);
@@ -193,11 +194,12 @@ public static double SimilarityIndex2(double[] channelL, double[] channelR, doub
             //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep);
             int frameSize = 512;
             int frameStep = 512;
+            bool doPreemphasis = false;
 
-            var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
+            var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
             var spgrmL = dspOutputL.AmplitudeSpectrogram;
 
-            var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
+            var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, doPreemphasis, frameSize, frameStep);
             var spgrmR = dspOutputR.AmplitudeSpectrogram;
 
             double similarityIndex = 0;