Skip to content

Commit

Permalink
Make spectrogram smoothing accessible to user
Browse files Browse the repository at this point in the history
Issue #471 Make spectrogram smoothing accessible to user. The default is no spectrogram smoothing.
  • Loading branch information
towsey committed Jun 15, 2021
1 parent f3bc0f8 commit 0a7d356
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 45 deletions.
94 changes: 49 additions & 45 deletions src/AudioAnalysisTools/Harmonics/HarmonicParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,26 @@ public class HarmonicParameters : CommonParameters
public int? MinFormantGap { get; set; }

/// <summary>
/// Gets or sets the the top bound of gap between formants. Units are Hertz.
/// Gets or sets the top bound of gap between formants. Units are Hertz.
/// </summary>
public int? MaxFormantGap { get; set; }

/// <summary>
/// Gets or sets a smoothing window.
/// This is used to run a moving average filter along each of the frequency bins.
/// It can help to smooth over discontinuous formants.
/// </summary>
public int? SmoothingWindow { get; set; } = 0;

public static (List<EventCommon> SpectralEvents, List<Plot> DecibelPlots) GetComponentsWithHarmonics(
SpectrogramStandard spectrogram,
HarmonicParameters hp,
double? decibelThreshold,
TimeSpan segmentStartOffset,
string profileName)
{
// a window to smooth the frequency bins

var spectralEvents = new List<EventCommon>();
var plots = new List<Plot>();

Expand All @@ -53,6 +62,7 @@ public static (List<EventCommon> SpectralEvents, List<Plot> DecibelPlots) GetCom
spectrogram,
hp.MinHertz.Value,
hp.MaxHertz.Value,
hp.SmoothingWindow.Value,
decibelThreshold.Value,
hp.DctThreshold.Value,
hp.MinDuration.Value,
Expand All @@ -72,6 +82,7 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double
SpectrogramStandard spectrogram,
int minHz,
int maxHz,
int smoothingWindow,
double decibelThreshold,
double dctThreshold,
double minDuration,
Expand All @@ -91,12 +102,18 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double
int maxBin = (int)Math.Round(maxHz / freqBinWidth);
int bandBinCount = maxBin - minBin + 1;

// create a unit converter
var converter = new UnitConverters(
segmentStartOffset: segmentStartOffset.TotalSeconds,
sampleRate: spectrogram.SampleRate,
frameSize: spectrogram.Configuration.WindowSize,
frameOverlap: spectrogram.Configuration.WindowOverlap);

// extract the sub-band of interest
double[,] subMatrix = MatrixTools.Submatrix(spectrogram.Data, 0, minBin, frameCount - 1, maxBin);

//ii: DETECT HARMONICS
// now look for harmonics in search band using the Xcorrelation-DCT method.
var results = DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold);
// DETECT HARMONICS in search band using the Xcorrelation-DCT method.
var results = DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold, smoothingWindow);

// set up score arrays
double[] dBArray = results.Item1; // this is not used currently.
Expand Down Expand Up @@ -145,10 +162,11 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double

//extract the events based on length and threshhold.
// Note: This method does NOT do prior smoothing of the score array.
var harmonicEvents = ConvertScoreArray2Events(
var harmonicEvents = ConvertScoreArray2HarmonicEvents(
spectrogram,
harmonicIntensityScores2,
dBArray,
converter,
maxIndexArray,
minDuration,
maxDuration,
Expand All @@ -175,7 +193,7 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double
/// <param name="m">data matrix derived from the subband of a spectrogram.</param>
/// <param name="xThreshold">Minimum acceptable value to be considered part of a harmonic.</param>
/// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double xThreshold)
public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double xThreshold, int smoothingWindow)
{
int rowCount = m.GetLength(0);
int colCount = m.GetLength(1);
Expand All @@ -189,23 +207,17 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(
var intensity = new double[rowCount];
var maxIndexArray = new int[rowCount];

// for all time frames
for (int t = 2; t < rowCount - 2; t++)
// Run a moving average filter along each frequency bin.
// This may help to fill noise gaps in formants. Ignore values <3.
if (smoothingWindow > 2)
{
// Smooth the frame values by taking the average of five adjacent frames
var frame1 = MatrixTools.GetRow(m, t - 2);
var frame2 = MatrixTools.GetRow(m, t - 1);
var frame3 = MatrixTools.GetRow(m, t);
var frame4 = MatrixTools.GetRow(m, t + 1);
var frame5 = MatrixTools.GetRow(m, t + 2);

// set up a frame of average db values.
var avFrame = new double[colCount];
for (int i = 0; i < colCount; i++)
{
//avFrame[i] = (frame2[i] + frame3[i] + frame4[i]) / 3;
avFrame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
}
m = MatrixTools.SmoothColumns(m, smoothingWindow);
}

// for all time-frames or spectra
for (int t = 0; t < rowCount; t++)
{
var avFrame = MatrixTools.GetRow(m, t);

// ignore frame if its maximum decibel value is below the threshold.
double maxValue = avFrame.Max();
Expand All @@ -215,7 +227,7 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(
continue;
}

// do the autocross-correlation prior to doing the DCT.
// do autocross-correlation prior to doing the DCT.
double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(avFrame);

// xr has twice length of frame and is symmetrical. Require only first half.
Expand All @@ -229,8 +241,6 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(
normXr[i] = xr[i] / xr[0];
}

//normXr = DataTools.DiffFromMean(normXr);

// fit the x-correlation array to a line to remove first order trend.
// This will help in detecting the correct maximum DCT coefficient.
var xValues = new double[normXr.Length];
Expand All @@ -239,6 +249,7 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(
xValues[j] = j;
}

// do linear detrend of the vector of coefficients.
// get the line of best fit and subtract to get deviation from the line.
Tuple<double, double> values = MathNet.Numerics.Fit.Line(xValues, normXr);
var intercept = values.Item1;
Expand All @@ -251,7 +262,7 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(

// now do DCT across the detrended auto-cross-correlation
// set the first four values in the returned DCT coefficients to 0.
// We require a minimum of three formants, that is two gaps.
// We require a minimum of three formants, that is, two harmonic intervals.
int lowerDctBound = 4;
var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
Expand All @@ -278,10 +289,11 @@ public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(
/// <param name="scoreThreshold">threshold.</param>
/// <param name="segmentStartOffset">the time offset from segment start to the recording start.</param>
/// <returns>a list of acoustic events.</returns>
public static List<EventCommon> ConvertScoreArray2Events(
public static List<EventCommon> ConvertScoreArray2HarmonicEvents(
SpectrogramStandard spectrogram,
double[] scores,
double[] dBArray,
UnitConverters converter,
int[] maxIndexArray,
double minDuration,
double maxDuration,
Expand All @@ -293,23 +305,15 @@ public static List<EventCommon> ConvertScoreArray2Events(
{
double framesPerSec = spectrogram.FramesPerSecond;
double freqBinWidth = spectrogram.FBinWidth;

// create a unit converter
var converter = new UnitConverters(
segmentStartOffset: segmentStartOffset.TotalSeconds,
sampleRate: spectrogram.SampleRate,
frameSize: spectrogram.Configuration.WindowSize,
frameOverlap: spectrogram.Configuration.WindowOverlap);

// used this to calculate a normalised score between 0 - 1.0
double maxPossibleScore = 5 * scoreThreshold;
var scoreRange = new Interval<double>(0, maxPossibleScore);

bool isHit = false;
double frameOffset = 1 / framesPerSec;
int startFrame = 0;
int frameCount = scores.Length;

// use this to calculate a normalised score between 0 - 1.0
double maxPossibleScore = 5 * scoreThreshold;
var scoreRange = new Interval<double>(0, maxPossibleScore);

var events = new List<EventCommon>();

// pass over all time frames
Expand All @@ -319,14 +323,15 @@ public static List<EventCommon> ConvertScoreArray2Events(
{
//start of an event
isHit = true;
startFrame = i + 2;
startFrame = i;
}
else // check for the end of an event
if (isHit && scores[i] <= scoreThreshold)
{
// this is end of an event, so initialise it
isHit = false;
double duration = (i - 1 - startFrame) * frameOffset;
int eventFrameLength = i - startFrame;
double duration = eventFrameLength * frameOffset;

if (duration < minDuration || duration > maxDuration)
{
Expand All @@ -344,15 +349,14 @@ public static List<EventCommon> ConvertScoreArray2Events(
}

// calculate average event score
int eventLength = i - startFrame;
avScore /= eventLength;
avIndex /= eventLength;
avScore /= eventFrameLength;
avIndex /= eventFrameLength;
double freqBinGap = 2 * bandBinCount / avIndex;
double harmonicInterval = freqBinGap * freqBinWidth;

// calculate start and end time of this event relative to start of segment.
var eventStartWrtSegment = startFrame * frameOffset;
var eventEndWrtSegment = (i - 1) * frameOffset;
var eventEndWrtSegment = eventStartWrtSegment + duration;

// Initialize the event.
var ev = new HarmonicEvent()
Expand Down
5 changes: 5 additions & 0 deletions src/TowseyLibrary/MatrixTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2278,6 +2278,11 @@ public static void MinMax(int[,] data, out int min, out int max)

//=============================================================================

/// <summary>
/// This method smooths the columns of a matrix using a moving average filter.
/// It is useful for smoothing the freqeuncy bins of a spectrogram
/// where it is assumed that the matrix columns are the frequency bins.
/// </summary>
public static double[,] SmoothColumns(double[,] matrix, int window)
{
int rows = matrix.GetLength(0);
Expand Down

0 comments on commit 0a7d356

Please sign in to comment.