From da1e0e2a19d3a9378030d9219d12dbd9d6a11be6 Mon Sep 17 00:00:00 2001 From: Anthony Truskinger Date: Mon, 8 Mar 2021 21:33:31 +1000 Subject: [PATCH] Adds two new post-processing options - Adds the ability to not post process event within their decibel group. There are two schools of thought here: either you want to post-process within the decibel threshold group, and then use RmoveEnclosedEvents to select the largest bounding event, or, you're making a recogniser out of multiple component events detected at different thresholds. In the latter case you _want_ to group events together from different decibel groups and processing them by group removes any chance of making this work. Also adds a combine events vertically post-processing option. This acts much the same as the combine syllables post-processing step but instead allows us to combine multiple events detected individually at different frequencies into one tall event. A poor imitation of a harmonic algorithm, but useful when the component events are non-harmonic. --- .../Recognizers/GenericRecognizer.cs | 33 +++++++--- .../Events/Types/CompositeEvent.cs | 39 ++++++++++++ .../Events/Types/EventPostProcessing.cs | 61 ++++++++++++++++--- 3 files changed, 113 insertions(+), 20 deletions(-) diff --git a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs index fa3cdc078..cc9dcb8b1 100644 --- a/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs +++ b/src/AnalysisPrograms/Recognizers/GenericRecognizer.cs @@ -154,27 +154,39 @@ public override RecognizerResults Recognize( if (postprocessingConfig is not null) { var postEvents = new List(); - var groups = results.NewEvents.GroupBy(x => x.DecibelDetectionThreshold); - foreach (var group in groups) + if (Log.Choice(postprocessingConfig.PostProcessInDecibelGroups ?? true, "Post-processing in decibel groups?")) { - var key = group.Key; - List events = group.ToList(); - var ppEvents = EventPostProcessing.PostProcessingOfSpectralEvents( - events, + foreach (var group in results.NewEvents.GroupBy(x => x.DecibelDetectionThreshold)) + { + var decibelThreshold = group.Key; + var events = group.ToList(); + + Log.Debug($"TOTAL EVENTS detected by profiles at {decibelThreshold:F0} dB threshold = {events.Count}"); + + var ppEvents = PostProcessingOfSpectralEvents( + events, + postprocessingConfig, + results.Sonogram, + segmentStartOffset); + + postEvents.AddRange(ppEvents); + } + } + else + { + postEvents = PostProcessingOfSpectralEvents( + results.NewEvents, postprocessingConfig, - key.Value, results.Sonogram, segmentStartOffset); - - postEvents.AddRange(ppEvents); } // Running profiles with multiple dB thresholds can produce enclosed/nested events. // Remove all but the outermost events. if (configuration.PostProcessing.RemoveEnclosedEvents) { - Log.Debug($"\nREMOVE ENCLOSED EVENTS."); + Log.Debug($"\nREMOVE EVENTS ENCLOSED BY OTHER EVENTS."); Log.Debug($"Event count BEFORE removing enclosed events = {postEvents.Count}."); results.NewEvents = CompositeEvent.RemoveEnclosedEvents(postEvents); Log.Debug($"Event count AFTER removing enclosed events = {postEvents.Count}."); @@ -197,6 +209,7 @@ public override RecognizerResults Recognize( } } } + return results; } diff --git a/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs b/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs index 143eced68..07326b6d7 100644 --- a/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs +++ b/src/AudioAnalysisTools/Events/Types/CompositeEvent.cs @@ -283,6 +283,45 @@ public static List CombineStackedEvents(List events, return events.Cast().ToList(); } + /// + /// Combines events that are possible stacked harmonics or formants. + /// Two conditions apply: + /// (1) the events are coincident (have similar start and end times) + /// (2) the events are stacked (their minima and maxima are within the passed frequency gap). + /// NOTE: The difference between this method and CombineProximalEvents() is that stacked events should have similar start AND similar end times. + /// Having similar start and end times means the events are "stacked" (like pancakes!) in the spectrogram. + /// How closely stacked is determined by the hertzDifference argument. Typicaly, the formant spacing is not large, ~100-200Hz. + /// + public static List CombineVerticalEvents(List events, TimeSpan timeDifference, int hertzDifference) + { + if (events.Count < 2) + { + return events.Cast().ToList(); + } + + for (int i = events.Count - 1; i >= 0; i--) + { + for (int j = i - 1; j >= 0; j--) + { + bool eventsStartTogether = Math.Abs(events[i].EventStartSeconds - events[j].EventStartSeconds) < timeDifference.TotalSeconds; + bool eventsEndTogether = Math.Abs(events[i].EventEndSeconds - events[j].EventEndSeconds) < timeDifference.TotalSeconds; + bool eventsAreCoincident = eventsStartTogether && eventsEndTogether; + bool eventMinimaAreSimilar = Math.Abs(events[i].HighFrequencyHertz - events[j].LowFrequencyHertz) < hertzDifference; + bool eventMaximaAreSimilar = Math.Abs(events[j].HighFrequencyHertz - events[i].LowFrequencyHertz) < hertzDifference; + if (eventsAreCoincident && (eventMinimaAreSimilar || eventMaximaAreSimilar)) + { + var compositeEvent = CombineTwoEvents(events[i], events[j]); + events[j] = compositeEvent; + events.RemoveAt(i); + break; + } + } + } + + return events.Cast().ToList(); + } + + /// /// Merges two spectral events into one event. /// diff --git a/src/AudioAnalysisTools/Events/Types/EventPostProcessing.cs b/src/AudioAnalysisTools/Events/Types/EventPostProcessing.cs index 79f4e83f0..ee556d37c 100644 --- a/src/AudioAnalysisTools/Events/Types/EventPostProcessing.cs +++ b/src/AudioAnalysisTools/Events/Types/EventPostProcessing.cs @@ -29,7 +29,6 @@ public static class EventPostProcessing public static List PostProcessingOfSpectralEvents( List newEvents, PostProcessingConfig postprocessingConfig, - double decibelThreshold, BaseSonogram spectrogram, TimeSpan segmentStartOffset) { @@ -40,8 +39,7 @@ public static List PostProcessingOfSpectralEvents( // Step 4: Remove events whose bandwidth is too small or large. // Step 5: Remove events that have excessive noise in their side-bands. - Log.Debug($"\nBEFORE post-processing."); - Log.Debug($"TOTAL EVENTS detected by profiles at {decibelThreshold:F0} dB threshold = {newEvents.Count}"); + Log.DebugFormat($"\nBEFORE post-processing, event count: {0}.", newEvents.Count); // 1: Combine overlapping events. // This will be necessary where many small events have been found - possibly because the dB threshold is set low. @@ -81,9 +79,9 @@ public static List PostProcessingOfSpectralEvents( Log.Debug($" Expected Syllable Sequence: max={maxComponentCount}, Period: av={periodAv}s, sd={periodSd:F3} min={minPeriod:F3}s, max={maxPeriod:F3}s"); if (minPeriod <= 0.0) { - Log.Error($"Expected period={periodAv};sd={periodSd:F3} => min={minPeriod:F3}s;max={maxPeriod:F3}", + Log.Error( + $"Expected period={periodAv};sd={periodSd:F3} => min={minPeriod:F3}s;max={maxPeriod:F3}", new Exception("FATAL ERROR: This combination of values is invalid => negative minimum value.")); - System.Environment.Exit(1); } newEvents = EventFilters.FilterEventsOnSyllableCountAndPeriodicity(newEvents, maxComponentCount, periodAv, periodSd); @@ -91,6 +89,15 @@ public static List PostProcessingOfSpectralEvents( } } + if (Log.Choice(postprocessingConfig.CombineVerticalSyllables is not null, "Combine syllables vertically?")) + { + newEvents = CompositeEvent.CombineVerticalEvents( + newEvents.Cast().ToList(), + postprocessingConfig.CombineVerticalSyllables.MaxDifferenceSeconds.Seconds(), + (int)postprocessingConfig.CombineVerticalSyllables.MaxGapHertz); + Log.Debug($" Event count after combining vertical events = {newEvents.Count}"); + } + // 3: Filter the events for time duration (seconds) if ((postprocessingConfig.Duration != null) && (newEvents.Count > 0)) { @@ -160,6 +167,14 @@ public class PostProcessingConfig /// public SyllableSequenceConfig SyllableSequence { get; set; } + /// + /// Gets or sets the parameters required to combine syllables vertically. + /// + /// + /// Useful for when you have two different profiles for detecting a lower and upper portion of an event. + /// + public SyllableStackConfig CombineVerticalSyllables { get; set; } + /// /// Gets or sets the parameters required to filter events on their duration. /// @@ -182,6 +197,16 @@ public class PostProcessingConfig /// Setting this boolean true removes all but the outermost of any set of encloseed events. /// public bool RemoveEnclosedEvents { get; set; } + + /// + /// If true (the default) post-processing of events will be done in groups based on their decibel threshold detection value. + /// For example, all events at 3dB will be post-processed seperately from all events at 6dB. + /// If false will process ebents as if they were all detected at the same decibel threshold. + /// + /// + /// true will enable grouping, false will disable grouping. Defaults to true. + /// + public bool? PostProcessInDecibelGroups { get; set; } } /// @@ -262,11 +287,6 @@ public class SidebandConfig public class SyllableSequenceConfig { - /// - /// Gets or sets a value indicating Whether or not to combine events that constitute a sequence of the same strophe. - /// - public bool CombinePossibleSyllableSequence { get; set; } - /// /// Gets or sets a value indicating the maximum allowable start time gap (seconds) between events within the same strophe. /// The gap between successive syllables is the "period" of the sequence. @@ -318,5 +338,26 @@ public double PeriodStandardDeviation get => (this.SyllableStartDifference - this.ExpectedPeriod) / 3; } } + + /// + /// These parameters define the limits for combining stacked events - that is, events that are above or + /// below each other with some spectral gap between them. + /// + public class SyllableStackConfig + { + /// + /// The maximum allowed gap between the top of the lower event and the bottom of the higher event. + /// Any events that are closer than this gap will be combined. + /// + /// The allowable gap in Hertz. + public double MaxGapHertz {get; set; } + + /// + /// Controls how much variance is allowed in the temporal bounds of the event. + /// If the events are part of the same large event, then they should start and end at the same time. + /// + /// The allowable difference in seconds + public double MaxDifferenceSeconds {get; set; } + } } }