From dd5e5e1f980133345454a455bb77a714bb21fc77 Mon Sep 17 00:00:00 2001 From: Michael Towsey Date: Wed, 16 Jun 2021 13:18:39 +1000 Subject: [PATCH] Issue #471 check formant gap (#498) * Adds test cases for harmonic algoirthm Also adds some test assertions for validating objects. Related to #471 * Update CrossCorrelation.cs Issue #471 In order to condition the array of DCT coefficient values nicely, I have removed the linear trend. This helps to lower the long-wavelenth coefficients making it more likely to find the correct maximum. * Fill in the gaps in the harmonic score array. Issue #471 When searching for harmonic events, sometimes small gaps appear in the harmonic score array which means the events are too short and therefore rejected. This hack fills in gaps of one or two frames. This problem arises because the maximum DCT coefficient is sometimes of a longer wavelength (lower array index) than the the expected wavelength. * Update HarmonicAlgorithmTests.cs Issue #471 Reworked these test. They now both pass. The problem lies in the return DCT coefficients being unduly sensitive to noise. The harmonic recognizer still appears to be sensitive to the minHertz and maxHertz parameter values in ways that I would not expect. THis remains to be checked. * Create a new method to detect harmonic events Issue #471 Create a new method to detect harmonic events in a score array. Create new property in the HarmonicEvent class. * Add comments on the harmonic detection and DCT Issue #471 Add more detailed comments on the harmonic detection and DCT. Also clean out some unused code. * Update MFCCStuff.cs Issue #471 More explanatory comments. * Rework two test classes Issue #471 The Harmonic Algorithm tests are now working as one would expect. * Refactor image method Issue #471 Refactor the method that draws a matrix as an image so that the method returns the image. This had side effect on the Oscillations2010 class. * Create a new unit test Issue #471 Create a unit test for the method that draws a matrix of cosine basis functions. * Update GenericRecognizerTests.cs Issue #471 Adjust this unit test following all previous changes to the methods that find harmonic events. * Update CrossCorrelation.cs Issue #471 Removed the only useful method from this class and placed in HarmonicPArameters.cs. THis CrossCorrelations.cs class is now redundant and could possibly be removed except that it contains methods previously used in recognition of crow calls and human speech. However I doubt they of use any longer. * Update HarmonicParameters.cs Issue #471 Shifted method DetectHarmonicsInSpectrogramData() to a location where it better belongs. * Update src/AudioAnalysisTools/Events/Types/HarmonicEvent.cs Co-authored-by: Anthony Truskinger * Make spectrogram smoothing accessible to user Issue #471 Make spectrogram smoothing accessible to user. The default is no spectrogram smoothing. * Adjust expected values in unit tests Issue $471 Adjust expected values in unit tests. Returned frequency bounds to previous values. Added in the HarmonicInterval as a value to be checked. * Update GenericRecognizerTests.cs Issue #471 Remove hard coded path. * Small changes Issue #471 Small changes requested by Anthony. * Update HarmonicParameters.md Issue #471 Editing of the md file for Harmonics, taking account of recent changes. * Update HarmonicEvent.cs Restored Value in xml doc Co-authored-by: Anthony Truskinger --- docs/technical/apidoc/HarmonicParameters.md | 30 +- .../Extensions/EnumerableExtensions.cs | 5 +- .../Recognizers/GenericRecognizer.cs | 7 +- src/AudioAnalysisTools/CrossCorrelation.cs | 186 ++--------- src/AudioAnalysisTools/DSP/MFCCStuff.cs | 20 +- .../Events/Types/HarmonicEvent.cs | 10 +- .../Harmonics/HarmonicParameters.cs | 292 ++++++++++++++++-- .../Ocillations/Oscillations2010.cs | 4 +- .../Ocillations/Oscillations2012.cs | 8 - src/TowseyLibrary/ImageTools.cs | 38 ++- src/TowseyLibrary/MatrixTools.cs | 5 + tests/Acoustics.Test/Acoustics.Test.csproj | 1 + .../Recognizers/GenericRecognizerTests.cs | 35 ++- .../HarmonicAlgorithmTests.cs | 154 +++++++++ .../Extensions/EnumerableExtensionsTests.cs | 9 + .../Acoustics.Test/TestHelpers/Assertions.cs | 42 +++ .../TestHelpers/GeneratedImageTest.cs | 14 +- .../TestHelpers/OutputDirectoryTest.cs | 21 ++ tests/Fixtures/harmonic.wav | 3 + 19 files changed, 605 insertions(+), 279 deletions(-) create mode 100644 tests/Acoustics.Test/AudioAnalysisTools/HarmonicAnalysis/HarmonicAlgorithmTests.cs create mode 100644 tests/Fixtures/harmonic.wav diff --git a/docs/technical/apidoc/HarmonicParameters.md b/docs/technical/apidoc/HarmonicParameters.md index c3b271910..eee3d1650 100644 --- a/docs/technical/apidoc/HarmonicParameters.md +++ b/docs/technical/apidoc/HarmonicParameters.md @@ -4,7 +4,7 @@ uid: AnalysisPrograms.Recognizers.Base.HarmonicParameters ## Harmonic Event detection -The algorithm to find harmonic events uses a `discrete cosine transform` or *DCT*. Setting the correct DCT for the target syllable requires additional parameters. +The algorithm to find harmonic events uses a `discrete cosine transform` or *DCT* to find a stack of harmonics or formants. Setting the correct DCT for the target syllable requires additional parameters. Note that for our purposes here, the terms `harmonic` and `formant` are taken as equivalent. The algorithm to find harmonic events can be visualized as similar to the [oscillations algorithm]](xref:AnalysisPrograms.Recognizers.Base.OscillationParameters), @@ -16,6 +16,7 @@ Profiles: Speech: !HarmonicParameters FrameSize: 512 FrameStep: 512 + SmoothingWindow: 3 # The search band MinHertz: 500 MaxHertz: 5000 @@ -23,7 +24,7 @@ Profiles: MinDuration: 0.2 MaxDuration: 1.0 DecibelThreshold: 2.0 - # Min & max Hertz gap between harmonics + # Min & max Hertz interval between harmonics. MinFormantGap: 400 MaxFormantGap: 1200 DctThreshold: 0.15 @@ -32,24 +33,21 @@ Profiles: ``` > [!NOTE] -> The first parameters are common to all events—see +> Some of these parameters are common to all events, that is, those that determine the search band, the allowable event duration and +> the decibel threshold —see > . -> These parameters determine the search band, the allowable event duration and -> the decibel threshold. > > The remaining parameters are unique to the harmonic algorithm and > determine the search for harmonics. -There are only two parameters that are specific to `Harmonics`, -`MinFormantGap` and `MaxFormantGap`. These specify the minimum and maximum -allowed gap (measured in Hertz) between adjacent formants/harmonics. Note that -for these purposes the terms `harmonic` and `formant` are equivalent. -By default, the DCT is calculated over all bins in the search band. +There are four parameters specific to `Harmonics`: `SmoothingWindow`, +`MinFormantGap`, `MaxFormantGap` and `DctThreshold`. `SmoothingWindow` sets the window size of a moving average filter that smoothes the frequency bin values in the spectrogram prior to running the DCT. This can be useful when the formants of interest are broken by noise or interrupted. `MinFormantGap` and `MaxFormantGap` specify the minimum and maximum +allowed interval (measured in Hertz) between adjacent formants/harmonics. +By default, the DCT is calculated over all frequency bins in the search band. +`DctThreshold` is a value between 0.0 and 1.0 which sets the sensitivity of the search. A lower value of `DctThreshold` will detect more harmonic events. The output from a DCT operation is an array of coefficients (taking values in -`[0, 1]`). The index into the array is the gap between formants and the value -at that index is the formant amplitude. The index with largest amplitude -indicates the likely formant gap, but `DctThreshold` sets the minimum -acceptable amplitude value. Lowering `DctThreshold` increases the likelihood -that random noise will be accepted as a true set of formants; increasing -`DctThreshold` increases the likelihood that a target set of formants is rejected. +`[0, 1]`). The index into the array indicates a particular harmonic interval and the array value at that index indicates magnitude of that interval. The index with largest amplitude +indicates the likely interval between each of the formants. However, if the maximum coefficient is less than the `DctThreshold`, a stack of formants is consider not to be present. Lowering the `DctThreshold` increases the likelihood that random noise will be accepted as a true set of formants; increasing the `DctThreshold` increases the likelihood that a target set of formants is rejected. + +Note that to reduce the chances of the DCT algorithm producing an erroneous result, a minimum of three harmonics/formants is required, that is, the fundamental and two higher harmonics. Another way to think of this is that at least two harmonic intervals are required to constitute a stack of harmonics. Despite this precaution, the DCT algorithm is sensitive to noise and you made need to experiment to get the optimum parameter values. diff --git a/src/Acoustics.Shared/Extensions/EnumerableExtensions.cs b/src/Acoustics.Shared/Extensions/EnumerableExtensions.cs index 65ca3ae78..e559d14e5 100644 --- a/src/Acoustics.Shared/Extensions/EnumerableExtensions.cs +++ b/src/Acoustics.Shared/Extensions/EnumerableExtensions.cs @@ -315,12 +315,11 @@ public static string Join(this IEnumerable items, string delimiter = " ", var result = new StringBuilder(); foreach (var item in items) { - result.Append(item); - result.Append(delimiter); + result.AppendJoin(string.Empty, prefix, item, suffix, delimiter); } // return one delimiter length less because we always add a delimiter on the end - return result.ToString(0, result.Length - delimiter.Length); + return result.ToString(0, Math.Max(0, result.Length - delimiter.Length)); } public static string JoinFormatted(this IEnumerable items, string formatString = "{0:f2}", string delimiter = " ") => diff --git a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs index cc9dcb8b1..b26872ac0 100644 --- a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs +++ b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs @@ -6,6 +6,7 @@ namespace AnalysisPrograms.Recognizers { using System; using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; using System.IO; using System.Linq; using System.Reflection; @@ -72,10 +73,10 @@ public static void ValidateProfileTagsMatchAlgorithms(Dictionary { if (profile is CommonParameters c) { - var checks = c.Validate(null).Where(v => v is not null); - if (checks.Any()) + List failures = new(); + if (!Validator.TryValidateObject(c, new ValidationContext(c), failures)) { - throw new ConfigFileException(checks, file) { ProfileName = profileName }; + throw new ConfigFileException(failures, file) { ProfileName = profileName }; } } diff --git a/src/AudioAnalysisTools/CrossCorrelation.cs b/src/AudioAnalysisTools/CrossCorrelation.cs index 4a0624628..aad5c3b45 100644 --- a/src/AudioAnalysisTools/CrossCorrelation.cs +++ b/src/AudioAnalysisTools/CrossCorrelation.cs @@ -5,107 +5,34 @@ namespace AudioAnalysisTools { using System; - using Accord.Math; - using AudioAnalysisTools.DSP; using TowseyLibrary; + /// + /// This class contains two methods that could eventually be deleted. + /// The methods are only called by call recognizers that have not been maintained in a long time. + /// public class CrossCorrelation { + // THESE KEYS COMMENTED 2021 June 13 as they appear to be unused. //these keys are used to define a cross-correlation event in a sonogram. - public const string key_COUNT = "count"; - public const string key_START_FRAME = "startFrame"; - public const string key_END_FRAME = "endFrame"; - public const string key_FRAME_COUNT = "frameCount"; - public const string key_START_SECOND = "startSecond"; - public const string key_END_SECOND = "endSecond"; - public const string key_MIN_FREQBIN = "minFreqBin"; - public const string key_MAX_FREQBIN = "maxFreqBin"; - public const string key_MIN_FREQ = "minFreq"; - public const string key_MAX_FREQ = "maxFreq"; - public const string key_SCORE = "score"; - public const string key_PERIODICITY = "periodicity"; - //public const string key_COUNT = "count"; - - /* - public static Tuple DetectBarsUsingXcorrelation(double[,] m, int rowStep, int rowWidth, int colStep, int colWidth, - double intensityThreshold, int zeroBinCount) - { - bool doNoiseremoval = true; - - //intensityThreshold = 0.3; - - int rowCount = m.GetLength(0); - int colCount = m.GetLength(1); - int numberOfColSteps = colCount / colStep; - int numberOfRowSteps = rowCount / rowStep; - - var intensityMatrix = new double[numberOfRowSteps, numberOfColSteps]; - var periodicityMatrix = new double[numberOfRowSteps, numberOfColSteps]; - var hitsMatrix = new double[rowCount, colCount]; - double[] array2return = null; - - for (int b = 0; b < numberOfColSteps; b++) - { - int minCol = b * colStep; - int maxCol = minCol + colWidth - 1; - - double[,] subMatrix = MatrixTools.Submatrix(m, 0, minCol, rowCount - 1, maxCol); - double[] amplitudeArray = MatrixTools.GetRowAverages(subMatrix); - - if (doNoiseremoval) - { - double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction - SNR.BackgroundNoise bgn = SNR.SubtractBackgroundNoiseFromSignal(amplitudeArray, StandardDeviationCount); - amplitudeArray = bgn.NoiseReducedSignal; - } - - //double noiseThreshold = 0.005; - //for (int i = 1; i < amplitudeArray.Length - 1; i++) - //{ - // if ((amplitudeArray[i - 1] < noiseThreshold) && (amplitudeArray[i + 1] < noiseThreshold)) amplitudeArray[i] = 0.0; - //} - //DataTools.writeBarGraph(amplitudeArray); - if (b == 2) - { - array2return = amplitudeArray; //returned for debugging purposes only - } - - //ii: DETECT HARMONICS - var results = AutoAndCrossCorrelation.DetectPeriodicityInLongArray(amplitudeArray, rowStep, rowWidth, zeroBinCount); - double[] intensity = results.Item1; //an array of periodicity scores - double[] periodicity = results.Item2; - - //transfer periodicity info to a matrices. - for (int rs = 0; rs < numberOfRowSteps; rs++) - { - intensityMatrix[rs, b] = intensity[rs]; - periodicityMatrix[rs, b] = periodicity[rs]; - - //mark up the hits matrix - //double relativePeriod = periodicity[rs] / rowWidth / 2; - if (intensity[rs] > intensityThreshold) - { - int minRow = rs * rowStep; - int maxRow = minRow + rowStep - 1; - for (int r = minRow; r < maxRow; r++) - { - for (int c = minCol; c < maxCol; c++) - { - //hitsMatrix[r, c] = relativePeriod; - hitsMatrix[r, c] = periodicity[rs]; - } - } - } // if() - } // for loop over numberOfRowSteps - } // for loop over numberOfColSteps - - return Tuple.Create(intensityMatrix, periodicityMatrix, hitsMatrix, array2return); - } - */ + //public const string key_START_FRAME = "startFrame"; + //public const string key_END_FRAME = "endFrame"; + //public const string key_FRAME_COUNT = "frameCount"; + //public const string key_START_SECOND = "startSecond"; + //public const string key_END_SECOND = "endSecond"; + //public const string key_MIN_FREQBIN = "minFreqBin"; + //public const string key_MAX_FREQBIN = "maxFreqBin"; + //public const string key_MIN_FREQ = "minFreq"; + //public const string key_MAX_FREQ = "maxFreq"; + //public const string key_SCORE = "score"; + //public const string key_PERIODICITY = "periodicity"; /// - /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. + /// TODO THis method could eventually be deleted. It has been replaced by the other methods below. + /// Amongst other things, the term "periodicity" is used incorrectly in this method. + /// It actually refers to the "harmonic interval". + /// This method assumes the matrix is derived from a spectrogram rotated so that the matrix rows are spectral timeframes of a spectrogram. /// /// public static Tuple DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount) @@ -150,6 +77,8 @@ public static Tuple DetectBarsInTheRowsOfaMatrix(double[,] m } //DetectBarsInTheRowsOfaMatrix() /// + /// TODO TODO this method could be deleted. It is called only by a method to detect crow calls. + /// THis is long since superceded. /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. @@ -201,76 +130,5 @@ public static Tuple DetectHarmonicsInSonogramMatri return Tuple.Create(dBArray, intensity, periodicity); } - - /// - /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. - /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. - /// Developed for GenericRecognizer of harmonics. - /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. - /// But it requires that the frequency of any potential formants is not changing rapidly. - /// THis may not be suitable for detecting human speech. However can reduce the frame step. - /// - /// spectrogram data matrix. - /// Minimum sound level. - /// three arrays: dBArray, intensity, maxIndexArray. - public static Tuple DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) - { - int rowCount = m.GetLength(0); - int colCount = m.GetLength(1); - var binCount = m.GetLength(1); - - //set up the cosine coefficients - double[,] cosines = MFCCStuff.Cosines(binCount, binCount); - - // set up arrays to store decibels, formant intensity and max index. - var dBArray = new double[rowCount]; - var intensity = new double[rowCount]; - var maxIndexArray = new int[rowCount]; - - // for all time frames - for (int t = 2; t < rowCount - 2; t++) - { - // get average of five adjacent frames - var frame1 = MatrixTools.GetRow(m, t - 2); - var frame2 = MatrixTools.GetRow(m, t - 1); - var frame3 = MatrixTools.GetRow(m, t); - var frame4 = MatrixTools.GetRow(m, t + 1); - var frame5 = MatrixTools.GetRow(m, t + 2); - var frame = new double[colCount]; - for (int i = 0; i < colCount; i++) - { - frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; - } - - double maxValue = frame.Max(); - dBArray[t] = maxValue; - if (maxValue < dBThreshold) - { - continue; - } - - double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); - - // xr has twice length of frame and is symmetrical. - // Require only first half. - double[] normXr = new double[colCount]; - for (int i = 0; i < colCount; i++) - { - // Would normally normalise the xcorr values for overlap count. - // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. - //normXr[i] = xr[i] / (colCount - i); - normXr[i] = xr[i]; - } - - // now do DCT across the auto cross xr - int lowerDctBound = 2; - var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); - int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); - intensity[t] = dctCoefficients[indexOfMaxValue]; - maxIndexArray[t] = indexOfMaxValue; - } - - return Tuple.Create(dBArray, intensity, maxIndexArray); - } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/DSP/MFCCStuff.cs b/src/AudioAnalysisTools/DSP/MFCCStuff.cs index cfa6f2114..7fb618cb4 100644 --- a/src/AudioAnalysisTools/DSP/MFCCStuff.cs +++ b/src/AudioAnalysisTools/DSP/MFCCStuff.cs @@ -524,19 +524,25 @@ public static double InverseHerzTranform(double m, double c, double div) } /// - /// cosines. + /// Returns a matrix of cosine basis functions. + /// These are prepared prior to performing a DCT, Discrete Cosine Transform. + /// The rows k = 0 to coeffCount are the basis functions. + /// The columns, m = 0 to M where M = signalLength or the length of the required DCT. + /// The value of m/M ranges from 0 to 1.0. + /// The value of Pi*m/M ranges from 0 to Pi radians. + /// The value of k*Pi*m/M ranges from 0 to k*Pi radians. WHen k=2, 2Pi radians corresponds to one rotation. /// - /// Same as bin count or filter bank count ie length of spectrum = N. - /// count of coefficients. - public static double[,] Cosines(int spectrumLength, int coeffCount) + /// The length of the signal to be processed. e.g. the frequency bin count or filter bank count or ... + /// The number of basis funcitons = the rquired number of DCT coefficients. + public static double[,] Cosines(int signalLength, int coeffCount) { - double[,] cosines = new double[coeffCount + 1, spectrumLength]; //get an extra coefficient because do not want DC coeff + double[,] cosines = new double[coeffCount + 1, signalLength]; //get an extra coefficient because do not want DC coeff at [0]. for (int k = 0; k < coeffCount + 1; k++) { - double kPiOnM = k * Math.PI / spectrumLength; + double kPiOnM = k * Math.PI / signalLength; // for each spectral bin - for (int m = 0; m < spectrumLength; m++) + for (int m = 0; m < signalLength; m++) { cosines[k, m] = Math.Cos(kPiOnM * (m + 0.5)); //can also be Cos(kPiOnM * (m - 0.5) } diff --git a/src/AudioAnalysisTools/Events/Types/HarmonicEvent.cs b/src/AudioAnalysisTools/Events/Types/HarmonicEvent.cs index ce463facb..d10ecab54 100644 --- a/src/AudioAnalysisTools/Events/Types/HarmonicEvent.cs +++ b/src/AudioAnalysisTools/Events/Types/HarmonicEvent.cs @@ -11,6 +11,14 @@ namespace AudioAnalysisTools public class HarmonicEvent : SpectralEvent { + /// + /// Gets or sets the interval or spacing between harmonics/formants. + /// + /// + /// The calculated interval between formant peaks, in hertz. + /// - /// Gets or sets the bottom bound of the rectangle. Units are Hertz. + /// Gets or sets the bottom bound of the gap between formants. Units are Hertz. /// public int? MinFormantGap { get; set; } /// - /// Gets or sets the the top bound of the rectangle. Units are Hertz. + /// Gets or sets the top bound of gap between formants. Units are Hertz. /// public int? MaxFormantGap { get; set; } + /// + /// Gets or sets a smoothing window. + /// This is used to run a moving average filter along each of the frequency bins. + /// It can help to smooth over discontinuous formants. + /// If applied sensible values are 3, 5, or 7. + /// + public int SmoothingWindow { get; set; } = 0; + public static (List SpectralEvents, List DecibelPlots) GetComponentsWithHarmonics( SpectrogramStandard spectrogram, HarmonicParameters hp, @@ -42,16 +52,18 @@ public static (List SpectralEvents, List DecibelPlots) GetCom TimeSpan segmentStartOffset, string profileName) { + // a window to smooth the frequency bins + var spectralEvents = new List(); var plots = new List(); double[] decibelMaxArray; double[] harmonicIntensityScores; - (spectralEvents, decibelMaxArray, harmonicIntensityScores) = HarmonicParameters.GetComponentsWithHarmonics( + (spectralEvents, decibelMaxArray, harmonicIntensityScores) = GetHarmonicEvents( spectrogram, hp.MinHertz.Value, hp.MaxHertz.Value, - spectrogram.NyquistFrequency, + hp.SmoothingWindow, decibelThreshold.Value, hp.DctThreshold.Value, hp.MinDuration.Value, @@ -67,11 +79,11 @@ public static (List SpectralEvents, List DecibelPlots) GetCom return (spectralEvents, plots); } - public static (List SpectralEvents, double[] AmplitudeArray, double[] HarmonicIntensityScores) GetComponentsWithHarmonics( + public static (List SpectralEvents, double[] AmplitudeArray, double[] HarmonicIntensityScores) GetHarmonicEvents( SpectrogramStandard spectrogram, int minHz, int maxHz, - int nyquist, + int smoothingWindow, double decibelThreshold, double dctThreshold, double minDuration, @@ -80,23 +92,32 @@ public static (List SpectralEvents, double[] AmplitudeArray, double int maxFormantGap, TimeSpan segmentStartOffset) { + int nyquist = spectrogram.NyquistFrequency; var sonogramData = spectrogram.Data; int frameCount = sonogramData.GetLength(0); int binCount = sonogramData.GetLength(1); + // get the min and max bin of the freq-band of interest. double freqBinWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / freqBinWidth); int maxBin = (int)Math.Round(maxHz / freqBinWidth); + int bandBinCount = maxBin - minBin + 1; + + // create a unit converter + var converter = new UnitConverters( + segmentStartOffset: segmentStartOffset.TotalSeconds, + sampleRate: spectrogram.SampleRate, + frameSize: spectrogram.Configuration.WindowSize, + frameOverlap: spectrogram.Configuration.WindowOverlap); - // extract the sub-band + // extract the sub-band of interest double[,] subMatrix = MatrixTools.Submatrix(spectrogram.Data, 0, minBin, frameCount - 1, maxBin); - //ii: DETECT HARMONICS - // now look for harmonics in search band using the Xcorrelation-DCT method. - var results = CrossCorrelation.DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold); + // DETECT HARMONICS in search band using the Xcorrelation-DCT method. + var results = DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold, smoothingWindow); // set up score arrays - double[] dBArray = results.Item1; + double[] dBArray = results.Item1; // this is not used currently. double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity int[] maxIndexArray = results.Item3; @@ -104,47 +125,262 @@ public static (List SpectralEvents, double[] AmplitudeArray, double { if (harmonicIntensityScores[r] < dctThreshold) { + //ignore frames where DCT coefficient (proxy for formant intensity) is below threshold continue; } - //ignore locations with incorrect formant gap + //ignore frames with incorrect formant gap + // first get id of the maximum coefficient. int maxId = maxIndexArray[r]; - int bandBinCount = maxBin - minBin + 1; double freqBinGap = 2 * bandBinCount / (double)maxId; double formantGap = freqBinGap * freqBinWidth; + + // remove values where formantGap lies outside the expected range. if (formantGap < minFormantGap || formantGap > maxFormantGap) { harmonicIntensityScores[r] = 0.0; } } - // smooth the harmonicIntensityScores array to allow for brief gaps. - harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 3); + // fill in brief gaps of one or two frames. + var harmonicIntensityScores2 = new double[harmonicIntensityScores.Length]; + for (int r = 1; r < frameCount - 2; r++) + { + harmonicIntensityScores2[r] = harmonicIntensityScores[r]; + if (harmonicIntensityScores[r - 1] > dctThreshold && harmonicIntensityScores[r] < dctThreshold) + { + // we have arrived at a possible gap. Fill the gap. + harmonicIntensityScores2[r] = harmonicIntensityScores[r - 1]; + } + + //now check if the gap is two frames wide + if (harmonicIntensityScores[r + 1] < dctThreshold && harmonicIntensityScores[r + 2] > dctThreshold) + { + harmonicIntensityScores2[r + 1] = harmonicIntensityScores[r + 2]; + r += 1; + } + } //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the score array. - var harmonicEvents = AcousticEvent.ConvertScoreArray2Events( - harmonicIntensityScores, + var harmonicEvents = ConvertScoreArray2HarmonicEvents( + spectrogram, + harmonicIntensityScores2, + dBArray, + converter, + maxIndexArray, + minDuration, + maxDuration, minHz, maxHz, - spectrogram.FramesPerSecond, - spectrogram.FBinWidth, + bandBinCount, dctThreshold, - minDuration, - maxDuration, segmentStartOffset); - var spectralEvents = new List(); + return (harmonicEvents, dBArray, harmonicIntensityScores2); + } + + /// + /// A METHOD TO DETECT a set of stacked HARMONICS/FORMANTS in the sub-band of a spectrogram. + /// Developed for GenericRecognizer of harmonics. + /// NOTE 1: This method assumes the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. + /// NOTE 2: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. + /// But it requires that the frequency of any potential formants is not changing rapidly. + /// A side-effect is that the edges of harmonic events become blurred. + /// This may not be suitable for detecting human speech. However can reduce the frame step. + /// NOTE 3: This method assumes that the minimum number of formants in a stack = 3. + /// This means that the first 4 values in the returned array of DCT coefficients are set = 0 (see below). + /// + /// data matrix derived from the subband of a spectrogram. + /// Minimum acceptable value to be considered part of a harmonic. + /// three arrays: dBArray, intensity, maxIndexArray. + public static Tuple DetectHarmonicsInSpectrogramData(double[,] m, double xThreshold, int smoothingWindow) + { + int rowCount = m.GetLength(0); + int colCount = m.GetLength(1); + var binCount = m.GetLength(1); + + //set up the cosine coefficients + double[,] cosines = MFCCStuff.Cosines(binCount, binCount); + + // set up time-frame arrays to store decibels, formant intensity and max index. + var dBArray = new double[rowCount]; + var intensity = new double[rowCount]; + var maxIndexArray = new int[rowCount]; + + // Run a moving average filter along each frequency bin. + // This may help to fill noise gaps in formants. Ignore values <3. + if (smoothingWindow > 2) + { + m = MatrixTools.SmoothColumns(m, smoothingWindow); + } + + // for all time-frames or spectra + for (int t = 0; t < rowCount; t++) + { + var avFrame = MatrixTools.GetRow(m, t); + + // ignore frame if its maximum decibel value is below the threshold. + double maxValue = avFrame.Max(); + dBArray[t] = maxValue; + if (maxValue < xThreshold) + { + continue; + } + + // do autocross-correlation prior to doing the DCT. + double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(avFrame); - // add in temporary names to the events. These can be altered later. - foreach (var he in harmonicEvents) + // xr has twice length of frame and is symmetrical. Require only first half. + double[] normXr = new double[colCount]; + for (int i = 0; i < colCount; i++) + { + // Typically normalise the xcorr values for overlap count. + // i.e. normXr[i] = xr[i] / (colCount - i); + // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. + // Therefore just normalise by dividing values by the first, so first value = 1. + normXr[i] = xr[i] / xr[0]; + } + + // fit the x-correlation array to a line to remove first order trend. + // This will help in detecting the correct maximum DCT coefficient. + var xValues = new double[normXr.Length]; + for (int j = 0; j < xValues.Length; j++) + { + xValues[j] = j; + } + + // do linear detrend of the vector of coefficients. + // get the line of best fit and subtract to get deviation from the line. + Tuple values = MathNet.Numerics.Fit.Line(xValues, normXr); + var intercept = values.Item1; + var slope = values.Item2; + for (int j = 0; j < xValues.Length; j++) + { + var lineValue = (slope * j) + intercept; + normXr[j] -= lineValue; + } + + // now do DCT across the detrended auto-cross-correlation + // set the first four values in the returned DCT coefficients to 0. + // We require a minimum of three formants, that is, two harmonic intervals. + int lowerDctBound = 4; + var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); + int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); + intensity[t] = dctCoefficients[indexOfMaxValue]; + maxIndexArray[t] = indexOfMaxValue; + } + + return Tuple.Create(dBArray, intensity, maxIndexArray); + } + + /// + /// Finds harmonic events in an array harmonic scores. + /// NOTE: The score array is assumed to be temporal i.e. each element of the array is derived from a time frame. + /// The method uses the passed scoreThreshold in order to calculate a normalised score. + /// Max possible score := threshold * 5. + /// normalised score := score / maxPossibleScore. + /// + /// the array of harmonic scores. + /// the array of max index values derived from the DCT. Used to calculate the harmonic interval. + /// duration of event must exceed this to be a valid event. + /// duration of event must be less than this to be a valid event. + /// lower freq bound of the event. + /// upper freq bound of the event. + /// threshold. + /// the time offset from segment start to the recording start. + /// a list of acoustic events. + public static List ConvertScoreArray2HarmonicEvents( + SpectrogramStandard spectrogram, + double[] scores, + double[] dBArray, + UnitConverters converter, + int[] maxIndexArray, + double minDuration, + double maxDuration, + int minHz, + int maxHz, + int bandBinCount, + double scoreThreshold, + TimeSpan segmentStartOffset) + { + double framesPerSec = spectrogram.FramesPerSecond; + double freqBinWidth = spectrogram.FBinWidth; + bool isHit = false; + double frameOffset = 1 / framesPerSec; + int startFrame = 0; + int frameCount = scores.Length; + + // use this to calculate a normalised score between 0 - 1.0 + double maxPossibleScore = 5 * scoreThreshold; + var scoreRange = new Interval(0, maxPossibleScore); + + var events = new List(); + + // pass over all time frames + for (int i = 0; i < frameCount; i++) { - var se = EventConverters.ConvertAcousticEventToSpectralEvent(he); - spectralEvents.Add(se); - se.Name = "Harmonics"; + if (isHit == false && scores[i] >= scoreThreshold) + { + //start of an event + isHit = true; + startFrame = i; + } + else // check for the end of an event + if (isHit && scores[i] <= scoreThreshold) + { + // this is end of an event, so initialise it + isHit = false; + int eventFrameLength = i - startFrame; + double duration = eventFrameLength * frameOffset; + + if (duration < minDuration || duration > maxDuration) + { + //skip events with duration shorter than threshold + continue; + } + + // obtain an average score and harmonic interval for the duration of the potential event. + double avScore = 0.0; + double avIndex = 0; + for (int n = startFrame; n <= i; n++) + { + avScore += scores[n]; + avIndex += maxIndexArray[n]; + } + + // calculate average event score + avScore /= eventFrameLength; + avIndex /= eventFrameLength; + double freqBinGap = 2 * bandBinCount / avIndex; + double harmonicInterval = freqBinGap * freqBinWidth; + + // calculate start and end time of this event relative to start of segment. + var eventStartWrtSegment = startFrame * frameOffset; + var eventEndWrtSegment = eventStartWrtSegment + duration; + + // Initialize the event. + var ev = new HarmonicEvent() + { + SegmentStartSeconds = segmentStartOffset.TotalSeconds, + SegmentDurationSeconds = frameCount * converter.SecondsPerFrameStep, + Name = "Stacked harmonics", + ResultStartSeconds = segmentStartOffset.TotalSeconds + eventStartWrtSegment, + EventStartSeconds = segmentStartOffset.TotalSeconds + eventStartWrtSegment, + EventEndSeconds = segmentStartOffset.TotalSeconds + eventEndWrtSegment, + LowFrequencyHertz = minHz, + HighFrequencyHertz = maxHz, + Score = avScore, + ScoreRange = scoreRange, + HarmonicInterval = harmonicInterval, + //DecibelDetectionThreshold, + }; + + events.Add(ev); + } } - return (spectralEvents, dBArray, harmonicIntensityScores); + return events; } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/Ocillations/Oscillations2010.cs b/src/AudioAnalysisTools/Ocillations/Oscillations2010.cs index ed8b008aa..a174e5d49 100644 --- a/src/AudioAnalysisTools/Ocillations/Oscillations2010.cs +++ b/src/AudioAnalysisTools/Ocillations/Oscillations2010.cs @@ -8,6 +8,7 @@ namespace AudioAnalysisTools using System.Collections.Generic; using AudioAnalysisTools.DSP; using AudioAnalysisTools.StandardSpectrograms; + using SixLabors.ImageSharp; using TowseyLibrary; public static class Oscillations2010 @@ -169,7 +170,8 @@ public static void Execute(SpectrogramStandard sonogram, int minHz, int maxHz, //following two lines write bmp image of cos values for checking. string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; - ImageTools.DrawMatrix(cosines, fPath, true); + var image = ImageTools.DrawMatrix(cosines, true); + image.Save(fPath); foreach (AcousticEvent av in events) { diff --git a/src/AudioAnalysisTools/Ocillations/Oscillations2012.cs b/src/AudioAnalysisTools/Ocillations/Oscillations2012.cs index e34643971..c54923f25 100644 --- a/src/AudioAnalysisTools/Ocillations/Oscillations2012.cs +++ b/src/AudioAnalysisTools/Ocillations/Oscillations2012.cs @@ -187,14 +187,6 @@ public static void Execute( double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients - //following two lines write matrix of cos values for checking. - //string txtPath = @"C:\SensorNetworks\Output\cosines.txt"; - //FileTools.WriteMatrix2File_Formatted(cosines, txtPath, "F3"); - - //following two lines write bmp image of cos values for checking. - //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; - //ImageTools.DrawMatrix(cosines, bmpPath, true); - //traverse columns - skip DC column for (int c = minBin; c <= maxBin; c++) { diff --git a/src/TowseyLibrary/ImageTools.cs b/src/TowseyLibrary/ImageTools.cs index bbcaf8f74..30db670f6 100644 --- a/src/TowseyLibrary/ImageTools.cs +++ b/src/TowseyLibrary/ImageTools.cs @@ -3409,38 +3409,36 @@ public static Image DrawXaxisScale(Image image, int scaleHeight, d /// Draws matrix but automatically determines the scale to fit 1000x1000 pixel image. /// /// the data. - public static void DrawMatrix(double[,] matrix, string pathName, bool doScale) + public static Image DrawMatrix(double[,] matrix, bool doScale) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); - - int maxYpixels = rows; int maxXpixels = cols; - int YpixelsPerCell = 1; - int XpixelsPerCell = 1; + int yPixelsPerCell = 1; + int xPixelsPerCell = 1; if (doScale) { - maxYpixels = 1000; + var maxYpixels = 1000; maxXpixels = 2500; - YpixelsPerCell = maxYpixels / rows; - XpixelsPerCell = maxXpixels / cols; - if (YpixelsPerCell == 0) + yPixelsPerCell = maxYpixels / rows; + xPixelsPerCell = maxXpixels / cols; + if (yPixelsPerCell == 0) { - YpixelsPerCell = 1; + yPixelsPerCell = 1; } - if (XpixelsPerCell == 0) + if (xPixelsPerCell == 0) { - XpixelsPerCell = 1; + xPixelsPerCell = 1; } } - int Ypixels = YpixelsPerCell * rows; - int Xpixels = XpixelsPerCell * cols; + int yPixels = yPixelsPerCell * rows; + int xPixels = xPixelsPerCell * cols; Color[] grayScale = GrayScale(); - var bmp = new Image(Xpixels, Ypixels); + var bmp = new Image(xPixels, yPixels); double[,] norm = DataTools.normalise(matrix); for (int r = 0; r < rows; r++) @@ -3453,11 +3451,11 @@ public static void DrawMatrix(double[,] matrix, string pathName, bool doScale) greyId = 0; } - int xOffset = XpixelsPerCell * c; - int yOffset = YpixelsPerCell * r; - for (int x = 0; x < XpixelsPerCell; x++) + int xOffset = xPixelsPerCell * c; + int yOffset = yPixelsPerCell * r; + for (int x = 0; x < xPixelsPerCell; x++) { - for (int y = 0; y < YpixelsPerCell; y++) + for (int y = 0; y < yPixelsPerCell; y++) { bmp[xOffset + x, yOffset + y] = grayScale[greyId]; } @@ -3465,7 +3463,7 @@ public static void DrawMatrix(double[,] matrix, string pathName, bool doScale) } } - bmp.Save(pathName); + return bmp; } public static void DrawMatrixInColour(double[,] matrix, string pathName, bool doScale) diff --git a/src/TowseyLibrary/MatrixTools.cs b/src/TowseyLibrary/MatrixTools.cs index 4d15e81c9..c40a0767d 100644 --- a/src/TowseyLibrary/MatrixTools.cs +++ b/src/TowseyLibrary/MatrixTools.cs @@ -2278,6 +2278,11 @@ public static void MinMax(int[,] data, out int min, out int max) //============================================================================= + /// + /// This method smooths the columns of a matrix using a moving average filter. + /// It is useful for smoothing the freqeuncy bins of a spectrogram + /// where it is assumed that the matrix columns are the frequency bins. + /// public static double[,] SmoothColumns(double[,] matrix, int window) { int rows = matrix.GetLength(0); diff --git a/tests/Acoustics.Test/Acoustics.Test.csproj b/tests/Acoustics.Test/Acoustics.Test.csproj index 42d872d6d..bd01233ad 100644 --- a/tests/Acoustics.Test/Acoustics.Test.csproj +++ b/tests/Acoustics.Test/Acoustics.Test.csproj @@ -38,6 +38,7 @@ + diff --git a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs index 37e3ee6e7..c505d6460 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs @@ -310,10 +310,8 @@ public void TestWhistleAlgorithm() var results = recognizer.Recognize(recording, config, 100.Seconds(), null, this.TestOutputDirectory, null); - // add next two lines because cannot find spectrogram included with the test results. // Used for debugging only - var image = SpectrogramTools.GetSonogramPlusCharts(results.Sonogram, results.NewEvents, results.Plots, null); - image.SaveAsPng("C:/temp/image.png"); + this.SaveTestOutput(outputDirectory => GenericRecognizer.SaveDebugSpectrogram(results, config, outputDirectory, "TestWhistle")); Assert.AreEqual(4, results.NewEvents.Count); var @event = (SpectralEvent)results.NewEvents[0]; @@ -369,6 +367,9 @@ public void TestHarmonicsAlgorithm() var maxDuration = 1.1; var decibelThreshold = 2.0; + // no need to use a smoothing window on this artificial example. + int smoothingWindow = 0; + //Set up the virtual recording. int samplerate = 22050; double signalDuration = 13.0; //seconds @@ -395,11 +396,11 @@ public void TestHarmonicsAlgorithm() //get the array of intensity values minus intensity in side/buffer bands. var segmentStartOffset = TimeSpan.Zero; var plots = new List(); - var (acousticEvents, dBArray, harmonicIntensityScores) = HarmonicParameters.GetComponentsWithHarmonics( + var (acousticEvents, dBArray, harmonicIntensityScores) = HarmonicParameters.GetHarmonicEvents( spectrogram, minHertz, maxHertz, - spectrogram.NyquistFrequency, + smoothingWindow, decibelThreshold, dctThreshold, minDuration, @@ -431,30 +432,34 @@ public void TestHarmonicsAlgorithm() // effectively keeps only the *last* sonogram produced allResults.Sonogram = spectrogram; - // DEBUG PURPOSES COMMENT NEXT LINE - //GenericRecognizer.SaveDebugSpectrogram(allResults, null, outputDirectory, "name"); + // SAVE IMAGE FOR DEBUG PURPOSES + this.SaveTestOutput(outputDirectory => GenericRecognizer.SaveDebugSpectrogram(allResults, null, outputDirectory, "HarmonicEvent")); Assert.AreEqual(4, allResults.NewEvents.Count); - var @event = (SpectralEvent)allResults.NewEvents[0]; - Assert.AreEqual("Harmonics", @event.Name); - Assert.AreEqual("SpectralEvent", @event.ComponentName); + var @event = (HarmonicEvent)allResults.NewEvents[0]; + Assert.AreEqual("Stacked harmonics", @event.Name); + Assert.AreEqual("HarmonicEvent", @event.ComponentName); Assert.AreEqual(3.0, @event.EventStartSeconds, 0.1); Assert.AreEqual(4.0, @event.EventEndSeconds, 0.1); Assert.AreEqual(500, @event.LowFrequencyHertz); Assert.AreEqual(5000, @event.HighFrequencyHertz); + Assert.AreEqual(630, @event.HarmonicInterval, 10); - @event = (SpectralEvent)allResults.NewEvents[1]; - Assert.AreEqual(5.2, @event.EventStartSeconds, 0.1); + @event = (HarmonicEvent)allResults.NewEvents[1]; + Assert.AreEqual(5.0, @event.EventStartSeconds, 0.1); Assert.AreEqual(5.5, @event.EventEndSeconds, 0.1); + Assert.AreEqual(680, @event.HarmonicInterval, 10); - @event = (SpectralEvent)allResults.NewEvents[2]; + @event = (HarmonicEvent)allResults.NewEvents[2]; Assert.AreEqual(7.0, @event.EventStartSeconds, 0.1); Assert.AreEqual(8.0, @event.EventEndSeconds, 0.1); + Assert.AreEqual(1050, @event.HarmonicInterval, 10); - @event = (SpectralEvent)allResults.NewEvents[3]; + @event = (HarmonicEvent)allResults.NewEvents[3]; Assert.AreEqual(11.3, @event.EventStartSeconds, 0.1); - Assert.AreEqual(11.6, @event.EventEndSeconds, 0.1); + Assert.AreEqual(11.7, @event.EventEndSeconds, 0.1); + Assert.AreEqual(630, @event.HarmonicInterval, 10); } [TestMethod] diff --git a/tests/Acoustics.Test/AudioAnalysisTools/HarmonicAnalysis/HarmonicAlgorithmTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/HarmonicAnalysis/HarmonicAlgorithmTests.cs new file mode 100644 index 000000000..a92078748 --- /dev/null +++ b/tests/Acoustics.Test/AudioAnalysisTools/HarmonicAnalysis/HarmonicAlgorithmTests.cs @@ -0,0 +1,154 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace Acoustics.Test.AudioAnalysisTools.HarmonicAnalysis +{ + using System; + using System.IO; + using System.Linq; + using Acoustics.Shared.Csv; + using Acoustics.Test.TestHelpers; + using global::AnalysisPrograms.Recognizers.Base; + using global::AudioAnalysisTools; + using global::AudioAnalysisTools.DSP; + using global::AudioAnalysisTools.Events; + using global::AudioAnalysisTools.StandardSpectrograms; + using global::AudioAnalysisTools.WavTools; + using global::TowseyLibrary; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + [TestClass] + public class HarmonicAlgorithmTests : OutputDirectoryTest + { + private static readonly FileInfo TestAsset = PathHelper.ResolveAsset("harmonic.wav"); + private readonly SpectrogramStandard spectrogram; + + private readonly double decibelThreshold = 6.0; + private readonly NoiseReductionType noiseReductionType = NoiseReductionType.Standard; + + // can also try these parameters when testing. + //private readonly double decibelThreshold = -80; + //private readonly NoiseReductionType noiseReductionType = NoiseReductionType.None; + + public HarmonicAlgorithmTests() + { + var recording = new AudioRecording(TestAsset); + this.spectrogram = new SpectrogramStandard( + new SonogramConfig + { + WindowSize = 512, + WindowStep = 512, + WindowOverlap = 0, + NoiseReductionType = this.noiseReductionType, + NoiseReductionParameter = 0.0, + Duration = recording.Duration, + SampleRate = recording.SampleRate, + }, + recording.WavReader); + } + + [TestMethod] + public void TestHarmonicsAlgorithmOn440HertzHarmonic() + { + var parameters = new HarmonicParameters + { + // Here is option to smooth the frequency bins. Can help with harmonic detection. + SmoothingWindow = 5, + MinHertz = 400, + MaxHertz = 5500, + MaxFormantGap = 470, + MinFormantGap = 420, + + //Need to make allowance for a longer than actual duration. + //because of smoothing of the spectrogram frames prior to the auto-crosscorrelation. + MinDuration = 1.0, + MaxDuration = 1.16, + DecibelThresholds = new double?[] { this.decibelThreshold }, + DctThreshold = 0.5, + }; + Assert.That.IsValid(parameters); + + var (events, plots) = HarmonicParameters.GetComponentsWithHarmonics( + this.spectrogram, + parameters, + this.decibelThreshold, + TimeSpan.Zero, + "440_harmonic"); + + this.SaveImage( + SpectrogramTools.GetSonogramPlusCharts(this.spectrogram, events, plots, null)); + + Assert.AreEqual(1, events.Count); + Assert.IsInstanceOfType(events.First(), typeof(HarmonicEvent)); + + // first harmonic is 440Hz fundamental, with 12 harmonics, stopping at 5280 Hz + var actual = events.First() as HarmonicEvent; + + //The actual bounds are not exact due to smoothing of the frames prior to the auto-crosscorrelation + // that occurs in CrossCorrelation.DetectHarmonicsInSpectrogramData() + Assert.AreEqual(1.0, actual.EventStartSeconds, 0.1); + Assert.AreEqual(2.0, actual.EventEndSeconds, 0.1); + Assert.AreEqual(400, actual.LowFrequencyHertz); + Assert.AreEqual(5500, actual.HighFrequencyHertz); + Assert.AreEqual(440, actual.HarmonicInterval, 30); + } + + [TestMethod] + public void TestHarmonicsAlgorithmOn1000HertzHarmonic() + { + var parameters = new HarmonicParameters + { + MinHertz = 400, + MaxHertz = 5500, + MaxFormantGap = 1100, + MinFormantGap = 950, + MinDuration = 0.95, + MaxDuration = 1.2, + DecibelThresholds = new double?[] { this.decibelThreshold }, + DctThreshold = 0.3, + }; + Assert.That.IsValid(parameters); + + var (events, plots) = HarmonicParameters.GetComponentsWithHarmonics( + this.spectrogram, + parameters, + this.decibelThreshold, + TimeSpan.Zero, + "1000_harmonic"); + + this.SaveImage( + SpectrogramTools.GetSonogramPlusCharts(this.spectrogram, events, plots, null)); + + Assert.AreEqual(1, events.Count); + Assert.IsInstanceOfType(events.First(), typeof(HarmonicEvent)); + + // second harmonic is 1000 Hz fundamental, with 4 harmonics, stopping at 5000 Hz + var actual = events.First() as HarmonicEvent; + Assert.AreEqual(3.0, actual.EventStartSeconds, 0.1); + Assert.AreEqual(4.0, actual.EventEndSeconds, 0.1); + Assert.AreEqual(400, actual.LowFrequencyHertz); + Assert.AreEqual(5500, actual.HighFrequencyHertz); + Assert.AreEqual(1000, actual.HarmonicInterval, 80); + } + + [TestMethod] + public void TestCosinesMatrixForDct() + { + // get an 8 x 8 matrix. + double[,] cosineBasisFunctions = MFCCStuff.Cosines(8, 8); + + //following line writes matrix of cos values for checking. + var outputDir = new FileInfo(Path.Join(this.TestOutputDirectory.FullName, "Cosines.csv")); + Csv.WriteMatrixToCsv(outputDir, cosineBasisFunctions); + + //following line writes bmp image of cos values for checking. + var image = ImageTools.DrawMatrix(cosineBasisFunctions, true); + this.SaveImage(image, "Cosines.png"); + + Assert.AreEqual(9, cosineBasisFunctions.GetLength(0)); + Assert.AreEqual(8, cosineBasisFunctions.GetLength(1)); + Assert.AreEqual(0.70710678118654768, cosineBasisFunctions[4, 4]); + } + } +} \ No newline at end of file diff --git a/tests/Acoustics.Test/Shared/Extensions/EnumerableExtensionsTests.cs b/tests/Acoustics.Test/Shared/Extensions/EnumerableExtensionsTests.cs index b72cb6df4..0d07351e4 100644 --- a/tests/Acoustics.Test/Shared/Extensions/EnumerableExtensionsTests.cs +++ b/tests/Acoustics.Test/Shared/Extensions/EnumerableExtensionsTests.cs @@ -28,6 +28,15 @@ public void TestJoinCustomDelimiter() Assert.AreEqual("0,-,1,-,2,-,3,-,4", actual); } + [TestMethod] + public void TestJoinCustomDelimiterWithPrefixAndSuffix() + { + var items = new[] { 0, 1, 2, 3, 4 }; + var actual = items.Join("/", "`", "~"); + + Assert.AreEqual("`0~/`1~/`2~/`3~/`4~", actual); + } + [TestMethod] public void TestJoinNonGeneric() { diff --git a/tests/Acoustics.Test/TestHelpers/Assertions.cs b/tests/Acoustics.Test/TestHelpers/Assertions.cs index e4e255de9..0da8a2f89 100644 --- a/tests/Acoustics.Test/TestHelpers/Assertions.cs +++ b/tests/Acoustics.Test/TestHelpers/Assertions.cs @@ -6,6 +6,7 @@ namespace Acoustics.Test.TestHelpers { using System; using System.Collections.Generic; + using System.ComponentModel.DataAnnotations; using System.Diagnostics; using System.Globalization; using System.IO; @@ -154,6 +155,47 @@ public static void AreEqual( } } + public static void IsEmpty( + this CollectionAssert _, + IEnumerable actual, + bool printItems = false, + string message = "") + { + if (actual == null) + { + Assert.Fail("actual is null, expected none"); + } + + using var actualEnum = actual.GetEnumerator(); + + if (actualEnum.MoveNext()) + { + var items = printItems ? actual.FormatList() : string.Empty; + Assert.Fail($"Actual had {actual.Count()} items and we expected none.\n{items}\n{message}"); + } + } + + public static void IsValid( + this Assert _, + T actual, + ValidationContext context = null, + string message = "") + where T : IValidatableObject + { + if (actual == null) + { + Assert.Fail("actual is null, expected an object to validate"); + } + + List failures = new(); + context ??= new ValidationContext(actual); + if (!Validator.TryValidateObject(actual, context, failures)) + { + var items = failures.Select(x => x.MemberNames.Join(", ") + " => " + x.ToString()).FormatList(); + Assert.Fail($"Actual was not valid. Returned validation failures:\n{items}\n{message}"); + } + } + public static void AreEqual( this CollectionAssert collectionAssert, double[,] expected, diff --git a/tests/Acoustics.Test/TestHelpers/GeneratedImageTest.cs b/tests/Acoustics.Test/TestHelpers/GeneratedImageTest.cs index e8cb13704..d88ddbad8 100644 --- a/tests/Acoustics.Test/TestHelpers/GeneratedImageTest.cs +++ b/tests/Acoustics.Test/TestHelpers/GeneratedImageTest.cs @@ -116,19 +116,7 @@ private void SaveImage(string typeToken, Image image) { var extra = this.ExtraName.IsNullOrEmpty() ? string.Empty : "_" + this.ExtraName; - var outName = $"{this.TestContext.TestName}{extra}_{typeToken}.png"; - if (image == null) - { - this.TestContext.WriteLine($"Skipping writing expected image `{outName}` because it is null"); - return; - } - - this.SaveTestOutput(output => - { - var path = output.CombinePath(outName); - image.Save(path); - return path; - }); + this.SaveImage(image, $"{extra}_{typeToken}"); } private bool ShouldWrite(WriteTestOutput should) => diff --git a/tests/Acoustics.Test/TestHelpers/OutputDirectoryTest.cs b/tests/Acoustics.Test/TestHelpers/OutputDirectoryTest.cs index d5b268333..8782443a9 100644 --- a/tests/Acoustics.Test/TestHelpers/OutputDirectoryTest.cs +++ b/tests/Acoustics.Test/TestHelpers/OutputDirectoryTest.cs @@ -7,6 +7,8 @@ namespace Acoustics.Test.TestHelpers using System; using System.IO; using Microsoft.VisualStudio.TestTools.UnitTesting; + using SixLabors.ImageSharp; + using SixLabors.ImageSharp.PixelFormats; [TestClass] public class OutputDirectoryTest @@ -69,6 +71,25 @@ protected FileInfo SaveTestOutput(Func callback) return savedFile; } + protected void SaveImage(Image image, params string[] tokens) + { + var token = tokens.Join("_"); + + var outName = $"{this.TestContext.TestName}{token}.png"; + if (image == null) + { + this.TestContext.WriteLine($"Skipping writing expected image `{outName}` because it is null"); + return; + } + + this.SaveTestOutput(output => + { + var path = output.CombinePath(outName); + image.Save(path); + return path; + }); + } + /// /// Save a test result. /// Also saves copies of test results to daily output directories. diff --git a/tests/Fixtures/harmonic.wav b/tests/Fixtures/harmonic.wav new file mode 100644 index 000000000..41fbcba7a --- /dev/null +++ b/tests/Fixtures/harmonic.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f00906279a9a10675461cf07d09bfd2e9542e83f6b92ecc11ae4f527b9b04fe +size 304954