From c6154e4cbf09c45ca09bb0cd0aed84f22f305060 Mon Sep 17 00:00:00 2001 From: Anthony Truskinger Date: Tue, 9 Feb 2021 12:20:56 +1000 Subject: [PATCH] Partial edits for generic manual - Redesigned the installation pages so that they're simpler and have ssub-pages - added notes to config docs about profiles - renamed generic recognisers document so that the name makes a better URL compatible file name - added figure styling so that figures are presented in a nicer manner - Refactored some of the generic recognisers document (the more general parts) into theory sections) --- .vscode/settings.json | 36 ++- .vscode/tasks.json | 13 + CONTRIBUTING.md | 4 +- docs/basics/{ => advanced}/assetChooser.html | 0 docs/basics/advanced/manual_install.md | 80 ++++++ docs/basics/advanced/path.md | 119 +++++++++ docs/basics/config_files.md | 130 ++++++++-- docs/basics/installing.md | 98 ++------ docs/basics/introduction.md | 6 + docs/basics/path.md | 5 - docs/basics/toc.yml | 9 +- ...izersUsingAP.md => generic_recognizers.md} | 234 ++++++++++-------- docs/guides/toc.yml | 2 +- docs/guides/using_r.md | 1 + .../SevenKindsAcousticEvent.jpg | 0 docs/images/installer_screenshot.png | 3 + .../AP/partials/scripts.tmpl.partial | 2 +- docs/templates/AP/styles/main.css | 9 + docs/theory/acoustic_events.md | 64 +++++ docs/theory/glossary.md | 12 + docs/theory/spectrograms.md | 12 + docs/theory/toc.yml | 7 + src/AnalysisConfigFiles/Towsey.Acoustic.yml | 8 +- 23 files changed, 626 insertions(+), 228 deletions(-) rename docs/basics/{ => advanced}/assetChooser.html (100%) create mode 100644 docs/basics/advanced/manual_install.md create mode 100644 docs/basics/advanced/path.md delete mode 100644 docs/basics/path.md rename docs/guides/{DIY_CallRecognizersUsingAP.md => generic_recognizers.md} (73%) rename docs/{guides/Images => images}/SevenKindsAcousticEvent.jpg (100%) create mode 100644 docs/images/installer_screenshot.png create mode 100644 docs/theory/acoustic_events.md create mode 100644 docs/theory/spectrograms.md diff --git a/.vscode/settings.json b/.vscode/settings.json index 654f1670a..cd912638c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,16 +1,26 @@ { "cSpell.words": [ - "Arial", - "Ecoacoustics", - "Hanning", - "Roboto", - "Tahoma", - "Towsey", - "Truskinger", - "choco", - "docfx", - "nyquist", - "pwsh", - "rects" - ] + "Arial", + "Ecoacoustics", + "Hanning", + "Roboto", + "Tahoma", + "Towsey", + "Truskinger", + "choco", + "convolutional", + "docfx", + "formants", + "nyquist", + "pwsh", + "rects", + "resample", + "resampling", + "wingbeats" + ], + "triggerTaskOnSave.tasks": { + "docs-save-build": [ + "docs/**" + ] + } } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 9a06da3c0..ad7e1c6f1 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -37,6 +37,19 @@ "/consoleloggerparameters:NoSummary" ], "problemMatcher": "$msCompile" + }, + { + "label": "docs-save-build", + "command": "docfx", + "args": ["build"], + "type": "process", + "options": { + "cwd": "docs" + }, + "runOptions": { + "instanceLimit": 1, + "runOn": "default" + } } ] } \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3361388fb..77ae77146 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,7 +24,7 @@ - Dates and durations: 1. **ONLY** format dates in an ISO8601 format - a modified ISO8601 format with all punctuation removed is acceptable - for file/folder names. Example format string: yyyyMMddTHHmmssZ + for file/folder names. Example format string: `yyyyMMddTHHmmssZ` 1. **ALWAYS** format dates with UTC offset information if available 1. **PREFER** formatting dates in the UTC timezone 1. **AVOID** exposing `TimeSpan`s to user facing fields (use seconds instead) @@ -36,8 +36,6 @@ and improve on this. [![codecov](https://codecov.io/gh/QutEcoacoustics/audio-analysis/branch/master/graph/badge.svg)](https://codecov.io/gh/QutEcoacoustics/audio-analysis) - - ## Required Software The **required** software for developing new code (not running the program) includes: diff --git a/docs/basics/assetChooser.html b/docs/basics/advanced/assetChooser.html similarity index 100% rename from docs/basics/assetChooser.html rename to docs/basics/advanced/assetChooser.html diff --git a/docs/basics/advanced/manual_install.md b/docs/basics/advanced/manual_install.md new file mode 100644 index 000000000..39a9889cb --- /dev/null +++ b/docs/basics/advanced/manual_install.md @@ -0,0 +1,80 @@ +--- +title: Manual Install +uid: basics-manual-install +--- + + +## Manual Install + +1. Go to our [releases](https://github.com/QutEcoacoustics/audio-analysis/releases) page +2. Select the version you want to download + - Choose the _Latest release_ unless you specifically need features that are + available in a _Pre-release_ +3. Scroll down to the assets section for your chosen release +4. Download the version of AnalysisPrograms suitable for your computer (see [Choosing the asset](#choosing-the-asset)) +5. Extract the folder + - It can be installed in any directory + - We typically extract to a directory named `~\AP` or `~/.local/share/AP` on Linux +6. Make sure any [Prerequisites](#prerequisites) are installed +7. [Optional] Add the install directory to your `PATH` environment variable + - Instructions in the [Path](./path.md) document. +8. Finally, check the install by running: + +Run the following command: + +### [Windows Check](#tab/windows) + +```powershell +C:\AP\AnalysisPrograms.exe CheckEnvironment +``` + +### [Linux Check](#tab/linux) + +```bash +/AP/AnalysisPrograms CheckEnvironment +``` + +### [MacOSX Check](#tab/osx) + +```bash +/AP/AnalysisPrograms CheckEnvironment +``` + +*** + +### Choosing the asset + +[!include[](<./assetChooser.html>)] + +## Prerequisites + +### Windows + +None. Self contained download. + +### MacOSX + +None. Self contained download. + +### Linux/Unix + +The following additional dependencies may be required for Linux/Unix machines: + +- **MAYBE**: ffmpeg + - a packaged version with AP.exe should work for all platforms except ARM and ARM64 +- **MAYBE**: wavpack +- libsox-fmt-all, sox +- libav-tools (on some distros only, not needed in Ubuntu 18) + +## Build Packages + +There are two variants of AP.exe: + +1. The **Stable** release is well tested used by QUT Ecoacoustics on our servers + and is usually a few months old +2. The **Prerelease** release is automatically built weekly, every Monday. It has more + features and bug fixes than the stable release but it also could have more + bugs. + +You should use the **Stable** release unless there is a recent +feature implemented or bug fix in the prerelease version that you need. diff --git a/docs/basics/advanced/path.md b/docs/basics/advanced/path.md new file mode 100644 index 000000000..01518f2d8 --- /dev/null +++ b/docs/basics/advanced/path.md @@ -0,0 +1,119 @@ +--- +title: Adding to PATH +uid: basics-path +--- + + +## Adding AP.exe to `PATH` + +The `PATH` variable is a system wide variable (known as en environment variable) +that any program can access. The `PATH` variable contains a list of folders that +your computer can check when it searches for programs to run. + +When AP's folder is added to `PATH` you or any program on your computer can +run _AnalysisPrograms.exe_ without knowing where AP is actually installed. + +So instead of needing this: + +```powershell +> C:\Users\Anthony\AP\AnalysisPrograms.exe +``` + +You can instead write: + +```powershell +> AnalysisPrograms.exe +``` + +### Setup using the installer script + +The [automatic installer](../installing.md) will automatically add AP to `PATH` for you. + +If you don't want this to happen use the `-DontAddToPath` switch when installing. + +### Add to `PATH` manually + +### [Windows](#tab/windows) + +1. Find where AP is installed on your computer. This will be a directory (folder) + where _AnalysisPrograms.exe_ resides. +2. Open your _System Environment Variables_. You can type some of +_Edit environment variables for your account_ in the Start Menu search box +to find the settings. +3. Choose _Environment Variables..._ in the windows that popped up +4. In the _user variables_ section, find the _Path_ variable, select it, and then hit the _Edit_ button +5. Add the directory from step 1 to the end + - Ensure a semi-colon (`;`) delimits the new directory from the previous ones, if you're using an older version of Windows +6. Then click _OK_ or close all windows. +7. You will have to restart any programs for which you want to see the new value + +### [Linux](#tab/linux) + +1. Find where AP is installed on your computer. This will be a directory (folder) + where _AnalysisPrograms.exe_ resides. +2. Open or create your `~/.profile` file +3. Add the following line to the end: + + ```bash + PATH=$PATH: + ``` + + where you replace the `` with the directory from step 1. +4. Close and save the file +5. Run the same command in your current shell (`PATH=$PATH:`) to + see the change take effect immediately (or restart your shell). + +### [MacOSX](#tab/osx) + +1. Find where AP is installed on your computer. This will be a directory (folder) + where _AnalysisPrograms.exe_ resides. +2. Open or create your `~/.profile` file +3. Add the following line to the end: + + ```bash + PATH=$PATH: + ``` + + where you replace the `` with the directory from step 1. +4. Close and save the file +5. Run the same command in your current shell (`PATH=$PATH:`) to + see the change take effect immediately (or restart your shell). + +*** + +## Aliasing _AnalysisPrograms.exe_ to _AP_ + +_AnalysisPrograms.exe_ is a long name. Tiring to type, prone to errors. +It also isn't a good name for a cross-platform program; on Linux and Mac OS it +is simply _AnalysisPrograms_. + +To make it easier for people to experiment with and use _AP_ we aliased (gave +another name) to _AnalysisPrograms.exe_. We chose `AP`. + +So instead of needing this: + +```powershell +> C:\Users\Anthony\AP\AnalysisPrograms.exe +``` + +You can instead write (assuming _AnalysisPrograms.exe_ is on PATH): + +```powershell +> AP +``` + +### Setup an alias using the installer script + +If you have installed AP using the [automatic installer](../installing.md) then +this alias has already been set up for you! + +### Setup an alias manually + +This is advanced content. + +1. Find the _AP_ installation directory +2. Create a symbolic link + - Windows: `\AP.exe` and `\_AnalysisPrograms.exe` + - Linux/Mac: `/AP` and `/_AnalysisPrograms` + +You'll need to ensure `` in on `PATH`. diff --git a/docs/basics/config_files.md b/docs/basics/config_files.md index 6513259ac..ea912290e 100644 --- a/docs/basics/config_files.md +++ b/docs/basics/config_files.md @@ -1,3 +1,8 @@ +--- +title: Config Files +uid: basics-config-files +--- + # Config Files Most commands require a configuration file which gives you access to various @@ -6,12 +11,13 @@ parameters whose values change the outcome of the analysis. ## Syntax The config file must be in strict YAML format and should have the file extension -`.yml`. +`.yml`. -Comments in the config file give further information. All comment lines must -start with a hash symbol `#`. Be careful with the syntax. Incorrect syntax can -lead to errors that are difficult to trace. Typically, you will only need to -adjust a subset of the available parameters. They all have default values. +- Comments in the config file give further information. All comment lines must +start with a hash symbol `#` +- Be careful with the syntax. Incorrect syntax can lead to errors that are difficult to trace. +- Typically, you will only need to adjust a subset of the available parameters +- Most parameters have default values You can find an introduction to YAML here: @@ -19,6 +25,13 @@ You can find an introduction to YAML here: You can validate YAML files (to check for syntax errors) here: +When editing YAML files follow these rules: + +- Use a good editor like [Visual Studio Code](https://code.visualstudio.com/) which will detect mistakes and highlight + different parts of the file with different colours for you +- Always indent lines with four (4) spaces (Space Bar) +- **Never** use the Tab ā†¹ key or tab (`\t`) character to indent lines + ## Location All config files are packaged with _AP.exe_ releases. Inside the package you will @@ -26,7 +39,7 @@ find a `ConfigFiles` folder that contains all the config files. **IMPORTANT**: Avoid editing these files directly. If you want to change a value: -1. Copy the file to another directory +1. Copy the file to another directory (a personal folder) 1. Rename the file to describe the changes - e.g. `Towsey.Acoustic.yml` might become `Towsey.Acoustic.HighResolution.yml` - See the [Naming] section below for naming rules for the config file @@ -40,25 +53,112 @@ the naming format of the config files is now important. We use the name to deter For any config file used by `audio2csv`/`AnalyzeLongRecording` the name of the config file must follow this format: -``` -.[.].yml +```ebnf +.[.]*.yml ``` -If you find a config file that does not match this format, it will likely produce an error. -If your config file must be named in a different format the `--analysis-name` argument can be used to -disambiguate the analysis type you want to use. +The `author` and `analysis` name sections are mandatory. The `tag` section is optional, ignored by _AP_, and can be repeated. Here are some valid examples: - `Towsey.Acoustic.yml` - `Towsey.Acoustic.Marine.yml` - `Towsey.LitoriaFallax.CustomSettings_23.AnotherTag.yml` +- `Truskinger.NinoxBoobook.You.Can.Have.As.ManyDottedTags.As.YouWant.AfterTheFirstTwo.yml` Here are some **invalid** examples: -- `TowseyAcoustic.yml` -- `Towsey.Acousticyml` -- `Towsey.Acousticmarine.yml` +- `TowseyAcoustic.yml` + there's no dot (`.`) between the `Towsey` and `Acoustic` parts +- `Towsey.Acousticyml` + there's no dot (`.`) between the `Towsey.Acoustic` and `yml` parts +- `Towsey.Acousticmarine.yml` + _AP_ looks for an analysis called `Towsey.Acousticmarine` which doesn't exist. It should be `Towsey.Acoustic.marine.yml` + +If you find a config file that does not match this format, it will likely produce an error. +If your config file must be named in a different format the `--analysis-name` (or the short form `-a`) argument can be used to +disambiguate the analysis type you want to use. Please note this rule does not apply to other config files not directly used by `audio2csv`. For example, -`IndexProperties.yml` needs no partiuclar naming format to valid. +`IndexProperties.yml` needs no particular naming format to valid. + +## Editing + +Basic changes to a config file can be minimal. For example to change the resample rate for an analysis, only the number +needs to be changed: + +```diff +-ResampleRate: 22050 ++ResampleRate: 16000 +``` + +Most of our config files contain comments next to the parameters that explain what a parameter does. A comment is any +line that begins with an hash symbol (`#`). You can see the text that is a comment is coloured differently from the +parameter in the example below: + +```yaml +# SegmentDuration: units=seconds; +# Long duration recordings are cut into short segments for more efficient processing. +# Default segment length = 60 seconds. +# WARNING: You should not change this property!! +SegmentDuration: 60 +``` + +## Profiles + +The most variable part of a config file is the `Profiles` section. Profiles +allow us to add extra sections to an analysis. This can be useful for dealing with: + +- Geographical variation in calls. + Often a species call will vary between regions. The same detector can work for the different variants of a call but + slightly different parameters are needed. In this case we add a profile for each regional variation of the call that + have slightly different parameters or thresholds. +- Generic recognition efforts. + Each different type of syllable detection we want to use in a is added into a + different profile. In this way we can detect many different syllable variants and types in a fairly generic manner. + +Some analyses do not have a `Profiles` section. For those there's nothing to change. + +For config files that do support a `Profiles` section, the format will be as follows: + +```yml +# the word Profiles will always be at the start of the line +Profiles: + # each profile will have a name + MyName: + # Each profile will have some parameters + SomeParameter: 123 + AnotherParameter: "hello" + # more than one profile can be added + # We use the `!type` notation to tell AP what type of parameters we're giving it + KoalaExhale: !OscillationParameters + ComponentName: Oscillation + SpeciesName: PhascolarctosCinereus + FrameSize: 512 + FrameStep: 256 + WindowFunction: HANNING + BgNoiseThreshold: 0.0 + MinHertz: 250 + MaxHertz: 800 + MinDuration: 0.5 + MaxDuration: 2.5 + DctDuration: 0.30 + DctThreshold: 0.5 + MinOcilFreq: 20 + MaxOcilFreq: 55 + EventThreshold: 0.2 + # And another profile using the blob type (!BlobParameters) parameters + KoalaInhale: !BlobParameters + ComponentName: Inhale + MinHertz: 800 + MaxHertz: 8000 + MinDuration: 0.15 + MaxDuration: 0.8 + DecibelThresholds: + - 9.0 +``` + +Profiles can get complicated. Each configuration file should detail the different options available. If they don't, then +please let us know! + +For more information on constructing generic recognizers see . diff --git a/docs/basics/installing.md b/docs/basics/installing.md index 9c2d46c96..7faee2063 100644 --- a/docs/basics/installing.md +++ b/docs/basics/installing.md @@ -6,6 +6,8 @@ uid: basics-installing If you're new to using _AP.exe_ we recommend following the instructions in the practical. +You can choose to use our automated installer script or do a [manual install](./advanced/manual_install.md) + ## Supported Platforms - Any of the following platforms: @@ -74,6 +76,7 @@ Or, to install the prerelease version: *** +
> [!NOTE] > Please inspect which should point to @@ -85,9 +88,28 @@ Or, to install the prerelease version: > with. The above command downloads a remote PowerShell script and executes it on > your machine. -> [!WARN] +> [!WARNING] > The installer script is brand new. There may be bugs. No warranties provided. +3. The script should install or upgrade AP.exe. + - If it is upgrading it will ask you if you want to overwrite the old installation. + - Choose _yes_ unless you have NOT stored data files or config files in the AP folder + - Choose _no_ if you have stored data files or config files in the AP folder. + Copy out your files and then try installing again. + ![AP installer screenshot](../images/installer_screenshot.png) +4. If everything went well AP should be ready to go. Try running a command: + + ```bash + AP --version + ``` + +> [!TIP] +> _AP_ is an alias for_AnalysisPrograms.exe_ that we made to make AP easier to use. +> +> The alias does exactly the same thing as calling _AnalysisPrograms.exe_ by its full name. +> To learn more about how it works see . + + ## Uninstall If you used our automatic install you can use the same script to uninstall: @@ -115,77 +137,3 @@ Run the following command in _Terminal_: ```bash pwsh -nop -c '$function:i=irm "https://git.io/JtOo3";i -Un' ``` - -## Manual Install - -1. Go to our [releases](https://github.com/QutEcoacoustics/audio-analysis/releases) page -2. Select the version you want to download - - Choose the _Latest release_ unless you specifically need features that are - available in a _Pre-release_ -3. Scroll down to the assets section for your chosen release -4. Download the version of AnalysisPrograms suitable for your computer (see [Choosing the asset](#choosing-the-asset)) -5. Extract the folder - - It can be installed in any directory - - We typically extract to a directory named `~\AP` or `~/.local/share/AP` on Linux -6. Make sure any [Prerequisites](#prerequisites) are installed -7. [Optional] Add the install directory to your PATH environment variable -8. Finally, check the install by running: - -Run the following command: - -### [Windows Check](#tab/windows) - -```powershell -C:\AP\AnalysisPrograms.exe CheckEnvironment -``` - -### [Linux Check](#tab/linux) - -```bash -/AP/AnalysisPrograms CheckEnvironment -``` - -### [MacOSX Check](#tab/osx) - -```bash -/AP/AnalysisPrograms CheckEnvironment -``` - -*** - -### Choosing the asset - -[!include[](<./assetChooser.html>)] - -## Prerequisites - -### Windows - -None. Self contained download. - -### MacOSX - -None. Self contained download. - -### Linux/Unix - -The following additional dependencies may be required for Linux/Unix machines: - -- **MAYBE**: ffmpeg - - a packaged version with AP.exe should work for all platforms except ARM and ARM64 -- **MAYBE**: wavpack -- libsox-fmt-all, sox -- libav-tools (on some distros only, not needed in Ubuntu 18) - -## Build Packages - -There are two variants of AP.exe: - -1. The **Stable** release is well tested used by QUT Ecoacoustics on our servers - and is usually a few months old -2. The **Prerelease** release is automatically built weekly, every Monday. It has more - features and bug fixes than the stable release but it also could have more - bugs. - -You should use the **Stable** release unless there is a recent -feature implemented or bug fix in the prerelease version that you need. diff --git a/docs/basics/introduction.md b/docs/basics/introduction.md index 83638a7cc..0a2a291b1 100644 --- a/docs/basics/introduction.md +++ b/docs/basics/introduction.md @@ -1,3 +1,9 @@ +--- +title: Introduction to AnalysisPrograms.exe +uid: basics-introduction +--- + + # Introduction to AnalysisPrograms.exe AnalysisPrograms.exe (AP.exe) is a software package that analyses recordings diff --git a/docs/basics/path.md b/docs/basics/path.md deleted file mode 100644 index e1319880c..000000000 --- a/docs/basics/path.md +++ /dev/null @@ -1,5 +0,0 @@ -# Adding AP.exe to `PATH` - -Doing this makes using AP.exe easier. - -[TODO] diff --git a/docs/basics/toc.yml b/docs/basics/toc.yml index 9ac276332..55dd5e39c 100644 --- a/docs/basics/toc.yml +++ b/docs/basics/toc.yml @@ -1,7 +1,13 @@ - name: Introduction href: introduction.md + - name: Installing href: installing.md + items: + - name: Manual install + href: advanced/manual_install.md + - name: Adding to PATH + href: advanced/path.md - name: Command Line Interface href: cli.md @@ -22,7 +28,6 @@ href: versioning.md - name: Logs href: logs.md -- name: Adding to PATH - href: path.md + - name: Reporting bugs href: bug_report.md diff --git a/docs/guides/DIY_CallRecognizersUsingAP.md b/docs/guides/generic_recognizers.md similarity index 73% rename from docs/guides/DIY_CallRecognizersUsingAP.md rename to docs/guides/generic_recognizers.md index 0a0ccae34..368ee9578 100644 --- a/docs/guides/DIY_CallRecognizersUsingAP.md +++ b/docs/guides/generic_recognizers.md @@ -1,158 +1,174 @@ -`File: DIY_CallRecognizersUsingAP.md` -# Welcome to "DIY Call Recognizer" +--- +title: DIY Call Recognizer +uid: guides-generic-recognizers +--- - -> **DIY Call Recognizer** is a utility within **Analysis Programs**, a command line program that analyses long-duration audio-recordings of the environment. `AnalysisPrograms.exe` (abbreviated from here to `APexe`) can execute several different utlities or functions, one of which is the ability to write your own call recognizer. This manual describes how to write a **DIY call recognizer**. Refer to other manuals [here](https://github.com/QutEcoacoustics/audio-analysis/blob/master/README.md) for other utilities. +# DIY Call Recognizers -## Contents ## -1. Why bother with a DIY call recognizer? -2. Calls, syllables, harmonics -3. Acoustic events -4. Detecting acoustic events -5. Configuration files -6. Parameter names and values -7. An efficient strategy to tune parameters -8. Seven stages to building a DIY call recognizer -9. The command line -10. Building a larger data set +A **DIY Call Recognizer** is a utility within [_Analysis Programs_](xref:basics-introduction) which allows you to write +your own call recognizers. -============================================================== +A **DIY call recognizer** uses our _generic recognizer_ tools. This guide will help you make your own +_generic recognizer_. The generic recognizer allows a user to generically reuse and parametrize our syllable detectors. +Once you can detect new syllables those syllables can be combined to form new call recognizers. -NOTE: -- Incomplete parts of the manual are indicated by _**TODO**_. -- Features not yet implemented are marked with a construction emoji (šŸš§). +> [!NOTE] +> +> - Incomplete parts of the manual are indicated by _**TODO**_. +> - Features not yet implemented are marked with a construction emoji (šŸš§). -============================================================== - - -. - - -## 1. Why bother with a DIY call recognizer? +## 1. Why make a DIY call recognizer? There are three levels of sophistication in automated call recognizers: -- The simplist is the handcrafted template. + +- The simplest is the handcrafted template. - More powerful is a _machine learned_ model. - The current cutting edge of call recognizers is *deep-learning* using a convolutional neural network. A comparison of these recognizer types is shown in the following table and explained further in the subsequent paragraph. - ### **TABLE. A comparison of three different kinds of call recognizer** - -| Type of Recognizer | Who does the feature extraction? | Required dataset | Skill level | Accuracy | -|:---:|:---:|:---:|:---:|:---:| -|Template matching | User | Small (even 1!) | Least | Sometimes good | -|Supervised machine learning | User | Moderate (50-100s) | Some | Better | -|CNN | Part of CNN learning | Very large (10k to 1M) | A lot! | Best? | -|||| +
+| Type of Recognizer | Who does the feature extraction? | Required dataset | Skill level | Accuracy | +|:---------------------------:|:--------------------------------:|:----------------------:|:-----------:|:--------------:| +| Template matching | User | Small (even 1!) | Least | Sometimes good | +| Supervised machine learning | User | Moderate (50-100s) | Some | Better | +| CNN | Part of CNN learning | Very large (10k to 1M) | A lot! | Best? | -Hand-crafted, *rule-based* templates can be built using just one or a few examples of the target call. But like any rule-based *AI* system, they are *brittle*, that is, they break easily if the target call falls even slightly outside the bounds of the rules. A supervised machine-learning model, for example an SVM or Random Forest, is far more resilient to slight changes in the range of the target call but they require many more training examples, on the order of 100 training examples. Finally, the convolutional neural network (CNN) is the most powerful learning machine available today (2021) but this power is achieved only by supplying thousands of examples of the each target call. +
A comparison of three different kinds of call recognizer
+
-> **Note**: The following two rules apply to the preparation of training/test datasets, regardless of the recognizer type. +Hand-crafted, *rule-based* templates can be built using just one or a few examples of the target call. But like any +ule-based *AI* system, they are *brittle*, that is, they break easily if the target call falls even slightly outside +the bounds of the rules. -> - **Rule 1.** Rubbush in => rubbish out!! That is, think carefully about your chosen training/test examples. +A supervised machine-learning model, for example an SVM or Random Forest, is far more resilient to slight changes in the +range of the target call but they require many more training examples, on the order of 100 training examples. -> - **Rule 2.** Training and test sets should be representative (in some loose statistical sense) of the intended operational environment. +Finally, the convolutional neural network (CNN) is the most powerful learning machine available today (2021) but this +power is achieved only by supplying thousands of examples of the each target call. +> [!TIP] +> The following two rules apply to the preparation of training/test datasets, regardless of the recognizer type. +> +> - **Rule 1.** Rubbish in āž” rubbish out! +> That is, think carefully about your chosen training/test examples. +> - **Rule 2.** Training and test sets should be representative (in some loose statistical sense) of the intended +> operational environment. -To summarise (and at the risk of over-simplification), a hand-crafted template has low cost and low benefit; a machine-learned model has medium cost and medium benefit, while a deep-learned model has high cost and high benefit. The cost/benefit ratio in each case is similar but here is the catch - the cost must be paid _before_ you get the benefit! Furthermore, in a typical ecological study, a bird species is of interest precisely because it is threatened or cryptic. When not many calls are available, the more sophisticated approaches become untenable. Hence there is a place for hand-crafted templates in call recognition. +To summarize (and at the risk of over-simplification): -These ideas are summarised in the following table: -| Type of Recognizer | Cost | Benefit | Cost/benefit ratio | The catch !| -|:---:|:---:|:---:|:---:|:---:| -| Template matching | Low | Low | A number | You must pay ... | -| Machine learning | Medium | Medium | A similar number | ... the cost before ... | -|CNN | High | High | A similar number | ... you get the benefit! | -|||| +- a hand-crafted template has low cost and low benefit +- a machine-learned model has medium cost and medium benefit +- while a deep-learned model has high cost and high benefit +The cost/benefit ratio in each case is similar but here is the catch - the cost must be paid _before_ you get the +benefit! Furthermore, in a typical ecological study, a bird species is of interest precisely because it is threatened or +cryptic. When not many calls are available, the more sophisticated approaches become untenable. Hence there is a place +for hand-crafted templates in call recognition. +These ideas are summarized in the following table: -**To summarise, the advantages of a hand-crafted DIY call recognizer are:** -1. You can do it yourself! -2. You can start with just one or two calls. -3. Allows you to collect a larger dataset for machine learning purposes. -4. Exposes the variability of the target call as you go. +| Type of Recognizer | Cost | Benefit | Cost/benefit ratio | The catch ! | +|:------------------:|:------:|:-------:|:------------------:|:------------------------:| +| Template matching | Low | Low | A number | You must pay ... | +| Machine learning | Medium | Medium | A similar number | ... the cost before ... | +| CNN | High | High | A similar number | ... you get the benefit! | +To summarize, the advantages of a hand-crafted DIY call recognizer are: -. +1. You can do it yourself! +2. You can start with just one or two calls +3. Allows you to collect a larger dataset (and refine it) for machine learning purposes +4. Exposes the variability of the target call as you go ## 2. Calls, syllables, harmonics -The algorithmic approach of **DIY Call Recognizer** makes particular assumptions about animals calls and how they are structured. A *call* is taken to be any sound of animal origin (whether for communication purposes or not) and includes bird songs/calls, animal vocalisations of any kind, the stridulation of insects, the wingbeats of birds and bats and the various sounds produced by acquatic animals. Calls typically have temporal and spectral structure. For example they may consist of a temporal sequence of two or more *syllables* (with "gaps" in between) or a set of simultaneous *harmonics* or *formants*. (The distinction between harmonics and formants does not concern us here.) - -**DIY Call Recognizer** attempts to recognize calls in a noise-reduced spectrogram, which is processed as a matrix of real values but visualised as a grey-scale image. Each row of pixels is a frqeuency bin and each column of pixels is a time-frame. The value in each spectrogram/matrix cell (represented visually by one image pixel) is the acoustic intensity in decibels with respect to the background noise baseline. Note that the decibel values in a noise-reduced spectrogram are always positive. -. +The algorithmic approach of **DIY Call Recognizer** makes particular assumptions about animals calls and how they are +structured. A *call* is taken to be any sound of animal origin (whether for communication purposes or not) and include +bird songs/calls, animal vocalizations of any kind, the stridulation of insects, the wingbeats of birds and bats and the +various sounds produced by aquatic animals. Calls typically have temporal and spectral structure. For example they may +consist of a temporal sequence of two or more *syllables* (with "gaps" in between) or a set of simultaneous *harmonics* +or *formants*. (The distinction between harmonics and formants does not concern us here.) +A **DIY Call Recognizer** attempts to recognize calls in a noise-reduced [spectrogram](xref:theory-spectrograms). ## 3. Acoustic events -An *acoustic event* is defined as a contiguous set of spectrogram cells/pixels whose decibel values exceed some user defined threshold. In the ideal case, an acoustic event should encompass a discrete component of acoustic energy within a call, syllable or harmonic. It will be separated from other acoustic events by intervening pixels having decibel values *below* the user defined threshold. **DIY Call Recognizer** contains algorithms to recognize seven different kinds of _"generic"_ acoustic event based on their shape in the spectrogram. We describe these in turn. +An [_acoustic event_](xref:theory-acoustic-events) is defined as a contiguous set of spectrogram cells/pixels whose decibel values exceed some user +defined threshold. In the ideal case, an acoustic event should encompass a discrete component of acoustic energy within +a call, syllable or harmonic. It will be separated from other acoustic events by gaps having decibel values *below* + the user defined threshold. -### 3.1. Shreik -This is a diffuse acoustic event that is extended in both time and frequency. While a shriek may have some internal structure, it is treated by **DIY Call Recognizer** as a "blob" of acoustic energy. A typical example is a parrot shriek. +**DIY Call Recognizer** contains algorithms to recognize seven different kinds of _generic_ acoustic events based on +their shape in the spectrogram. -### 3.2. Whistle -This is a narrow band, "pure" tone having duration over several to many time frames but having very restricted bandwidth. In theory a pure tone occupies a single frequency bin, but in practice bird whistles can occupy several freqeuncy bins and appear as a horizontal *spectral track* in the spectrogram. +There are seven types of acoustic events: -### 3.3. Chirp -This sounds like a whistle whose frequency increases or decreases over time. A chirp is said to be a *frqeuency modulated* tone. It appears in the spectrogram as a gently ascending or descending *spectral track*. +1. [Shrieks](xref:theory-acoustic-events#shrieks): + diffuse events treated as "blobs" of acoustic energy. A typical example is a parrot shriek. +2. [Whistles](xref:theory-acoustic-events#whistles): + "pure" tones (often imperfect) appearing as horizontal lines on a spectrogram +3. [Chirps](xref:theory-acoustic-events#chirps): + whistle like events that increases in frequency over time. Appears like a sloping line in a spectrogram. +4. [Whips](xref:theory-acoustic-events#whips): + sound like a "whip crack". They appear as steeply ascending or descending *spectral track* in the spectrogram. +5. [Clicks](xref:theory-acoustic-events#clicks): + appear as a single vertical line in a spectrogram and sounds, like the name suggests, as a very brief click. +6. [Oscillations](xref:theory-acoustic-events#oscillations): + An oscillation is the same (or nearly the same) syllable (typically whips or clicks) repeated at a fixed periodicity over several to many time-frames. +7. [Harmonics](xref:theory-acoustic-events#harmonics): + Harmonics are the same/similar shaped *whistle* or *chirp* repeated simultaneously at multiple intervals of frequency. Typically, the frequency intervals are similar as one ascends the stack of harmonics. -### 3.4. Whip -A *whip* is like a *chirp* except that the frequency modulation can be extremely rapid so that it sounds like a "whip crack". It has the appearance of a steeply ascending or descending *spectral track* in the spectrogram. An archetypal whip is the final component in the whistle-whip of the Australian whip-bird. Within the DIY Recognizer software, the distinction between a chirp and a whip is not sharp. That is, a *spectral track* that is ascending diagonally (cell-wise) at 45 degrees in the spectrogram will be detected by both the *chirp* and the *whip* algorithms. +For more detail on event types see [_acoustic events_](xref:theory-acoustic-events). -### 3.5. Click -The *click* appears as a single vertical line in a spectrogram and sounds, like the name suggests, as a very brief click. In practice, depending on spectrogram configuration settings, a *click* may occupy two or more adjacent time-frames. +
-Note that each of the above five acoustic events are "simple" events. The remaining two kinds of acoustic event are said to be composite, that is, they are composed of more than one acoustic event but the detection algorithm is designed to pick them up as a single event. +![Seven Kinds Of Acoustic Event](../images/SevenKindsAcousticEvent.jpg) -### 3.6. Oscillations -An oscillation is the same (or nearly the same) syllable (typically whips or clicks) repeated at a fixed periodicity over several to many time-frames. +
The seven kinds of generic acoustic event
+
-### 3.7. Harmonics -Harmonics are the same/similar shaped *whistle* or *chirp* repeated simultaneously at multiple intervals of frequency. Typically, the frequency intervals are similar as one ascends the stack of harmonics. +## 4. Detecting acoustic events -**Figure. The seven kinds of generic acoustic event** -![Seven Kinds Of Acoustic Event](./Images/SevenKindsAcousticEvent.jpg) +A **DIY Call Recognizer** detects or recognizes target calls in an audio recording using a sequence of steps: -. +1. Preprocessingā€”steps to prepare the recording for subsequent analysis. + 1. Input audio is broken up into 1-minute chunks + 2. Audio resampling +2. Processingā€”steps to identify target syllables as _"generic"_ acoustic events + 1. Spectrogram preparation + 1. Call syllable detection +3. Postprocessingā€”steps which simplify the output combining related acoustic events and filtering events to remove false-positives + 1. Combining syllable events into calls + 1. Syllable/call filtering +4. Saving Results +To execute these detection steps, suitable _parameter values_ must be placed into a [_configuration file_](xref:basics-config-files). -## 4. Detecting acoustic events -**DIY Call Recognizer** detects or recognizes target calls in an audio recording using a sequence of seven steps: -1. Audio segmentation -2. Audio resampling -3. Spectrogram preparation -4. Call syllable detection -5. Combining syllable events into calls -6. Syllable/call filtering -7. Saving Results +## 5. Configuration files -It helps to group these detection steps into four parts: -- Steps 1 and 2: _Pre-processing_ steps to prepare the recording for subsequent analysis. -- Steps 3 and 4: _Processing_ steps to identify target syllables as _"generic"_ acoustic events. -- Steps 5 and 6: _Post-processing_ steps which simplify the output from step 4 by combining related acoustic events and filtering events to remove false-positives. -- Step 7: The final step is to save those events which remain. +All analyses in _AP_ require a [_configuration file_](xref:basics-config-files) (henceforth, _config_ file) in order to tune the analysis. -To execute these seven detection steps correctly, you must enter suitable _parameter values_ into a _configuration file_. +It is no different for generic recognizer. To find calls of interest in a recording _AP_ reads the config file +which contains _parameters_ and then executes the detection steps accordingly. +> [!IMPORTANT] +> If you're not familiar with AP's config files please review our page. +### Naming -. - -## 5. Configuration files -### The structure of the config file name -**DIY Call Recognizer** is a command line tool. It requires a _configuration file_ (henceforth, _config_ file) in order to find calls of interest in a recording. The name of the config file is included as a command line argument. `APexe` reads the file containing a list of _parameters_ and then executes the detection steps accordingly. The command line will be described in a subsequent section. +Configuration files must be named in a certain format. -> NOTE: The config filename must have the correct structure in order to be recognized by `APexe`. For example, given a config file with the name `AuthorId.GenericRecognizer.NinoxBoobook.yml`: +> NOTE: The config filename must have the correct structure in order to be recognized by _AP_. For example, given a config file with the name `AuthorId.GenericRecognizer.NinoxBoobook.yml`: > - `AuthorId` is simply to keep track of the origins of the config. -> - `GenericRecognizer` tells `APexe` that this is a call recognition task and to parse the config file accordingly. Note this must be in second place in the file name. -> - `NinoxBoobook` (the Boobook owl) is an optional species name. `APexe` does not read/use this info but note that there must be no spaces in the file name. -> - `.yml` informs `APexe` what syntax to expect, in this case YAML. +> - `GenericRecognizer` tells _AP_ that this is a call recognition task and to parse the config file accordingly. Note this must be in second place in the file name. +> - `NinoxBoobook` (the Boobook owl) is an optional species name. _AP_ does not read/use this info but note that there must be no spaces in the file name. +> - `.yml` informs _AP_ what syntax to expect, in this case YAML. **_TODO_** need to check with Anthony re changes to structure of the config file name. -`APexe` config files must be written in a language called YAML. For an introduction to YAML syntax please see this article: https://sweetohm.net/article/introduction-yaml.en.html. +_AP_ config files must be written in a language called YAML. For an introduction to YAML syntax please see this article: https://sweetohm.net/article/introduction-yaml.en.html. We highly recommend using Notepad++ or Visual Studio Code to edit your YAML config files. Both are free, and both come with built in syntax highlighting for YAML files. ### Parameters @@ -200,7 +216,7 @@ Profiles: MaxDuration: 1.2 ``` -This artificial example illustrates three profiles (i.e. syllables or acoustic events) under the key word `Profiles`. Each profile has a user defined name (eg. BoobookSyllable3) and type. The `!` following the colon should be read as "of event type". Each profile in this example has four parameters. (The lines starting with `#` are comments and ignored by the yaml interpreter.) All three profiles have the same values for `MinHertz` and `MaxHertz` but different values for their time duration. Each profile is processed separately by `APexe`. +This artificial example illustrates three profiles (i.e. syllables or acoustic events) under the key word `Profiles`. Each profile has a user defined name (eg. BoobookSyllable3) and type. The `!` following the colon should be read as "of event type". Each profile in this example has four parameters. (The lines starting with `#` are comments and ignored by the yaml interpreter.) All three profiles have the same values for `MinHertz` and `MaxHertz` but different values for their time duration. Each profile is processed separately by _AP_. > *IMPORTANT NOTE ABOUT INDENTATION: In YAML syntax, the levels of a hierarchy are distinguished by indentation alone. It is extremely important that the indentation is retained or the config file will not be read correctly. Use four spaces for indentation, not the TAB key. @@ -271,7 +287,7 @@ Profiles: > The default value for *WindowFunction* is `HANNING`. There should never be a need to change this but you might like to try a `HAMMING` window if you are not satisfied with the appearance of your spectrograms. -> The "Bg" in *BgNoiseThreshold* means *background*. This parameter determines the degree of severity of noise removal from the spectrogram. The units are decibels. Zero sets the least severe noise removal. It is the safest default value and probably does not need to be changed. Increasing the value to say 3-4 decibels increases the likelihood that you will lose some important components of your target calls. For more on the noise removal algorithm used by `APexe` see [Towsey, Michael W. (2013) Noise removal from wave-forms and spectrograms derived from natural recordings of the environment.](https://eprints.qut.edu.au/61399/). +> The "Bg" in *BgNoiseThreshold* means *background*. This parameter determines the degree of severity of noise removal from the spectrogram. The units are decibels. Zero sets the least severe noise removal. It is the safest default value and probably does not need to be changed. Increasing the value to say 3-4 decibels increases the likelihood that you will lose some important components of your target calls. For more on the noise removal algorithm used by _AP_ see [Towsey, Michael W. (2013) Noise removal from wave-forms and spectrograms derived from natural recordings of the environment.](https://eprints.qut.edu.au/61399/). ### Step 4. Call syllable detection @@ -542,7 +558,7 @@ We described above the various steps required to tune the parameter values in a > **Step 6.** Repeat steps 3, 4 and 5 until you appear to have achieved the best possible accuracy. In order to minimise the number of iterations of stages 3 to 5, it is best to tune the configuration parameters in the sequence described in the previous section. -> **Step 7.** At this point you should have a recognizer that performs "as accurately as possible" on your training examples. The next step is to test your recognizer on one or a few examples that it has not seen before. That is, repeat steps 3, 4, 5 and 6 adding in a new example each time as they become available. It is also useful at this stage to accumulate a set of recordings that do *not* contain the target call. See Section 10 for more suggestions on building datasets. +> **Step 7.** At this point you should have a recognizer that performs "as accurately as possible" on your training examples. The next step is to test your recognizer on one or a few examples that it has not seen before. That is, repeat steps 3, 4, 5 and 6 adding in a new example each time as they become available. It is also useful at this stage to accumulate a set of recordings that do *not* contain the target call. See Section 10 for more suggestions on building datasets. > **Step 8:** At some point you are ready to use your recognizer on recordings obtained from the operational environment. @@ -551,22 +567,22 @@ We described above the various steps required to tune the parameter values in a ## 9. The DIY Call Recognizer command line -`APexe` performs several functions or actions, each one requiring a different command line. In its most general form, the command line takes the form: +_AP_ performs several functions or actions, each one requiring a different command line. In its most general form, the command line takes the form: >`AnalysisPrograms.exe action arguments options` In this section we only describe the command line for the _call recognizer_ action where: - action = "audio2csv". - arguments = three file paths, to an audio file, a config file and an output directory. -- options = short strings beginning with a single or double hyphen (`-` or `--`) that influence `APexe`'s execution. +- options = short strings beginning with a single or double hyphen (`-` or `--`) that influence _AP_'s execution. -Refer to other manuals [here](https://github.com/QutEcoacoustics/audio-analysis/blob/master/README.md) for a more complete description of `APexe`'s functionality. Note that the three file arguments must be in the order shown, that is: audio file, config file, output directory. +Refer to other manuals [here](https://github.com/QutEcoacoustics/audio-analysis/blob/master/README.md) for a more complete description of _AP_'s functionality. Note that the three file arguments must be in the order shown, that is: audio file, config file, output directory. **Options:** There are three frequently useful options: 1. The debug/no-debug options: Use "-d" for debug or "-n" for no debugging. 2. The verbosity options: "--quiet", "-v", "-vv", "-vvv" for different levels of verbosity. - 3. The analysis-identifier option: Use "-a" or "--analysis-identifier" followed by the , which in the case of DIY call recognizers is "NameId.GenericRecognizer". This is a useful addition to the command line because it informs `APexe` that this as a call recognition task in case the config file is not named correctly. + 3. The analysis-identifier option: Use "-a" or "--analysis-identifier" followed by the , which in the case of DIY call recognizers is "NameId.GenericRecognizer". This is a useful addition to the command line because it informs _AP_ that this as a call recognition task in case the config file is not named correctly. For other possible options, see the above referenced manual. @@ -594,7 +610,7 @@ In the above command line, the options are no-debugging and minimal logging. As indicated at Step 7 in Section 8 (*Eight steps to building a DIY Call Recognizer*), it is useful to accumulate a set of recordings, some of which contain the target call and some of which *do not*. The *negative* examples should include acoustic events that have previously been detected as FPs. You now have two sets of recordings, one set containing the target call(s) and one set containing previous FPs and other possible confusing acoustic events. The idea is to tune parameter values, while carefully watching for what effect the changes have on both data sets. Eventually, these two labelled data sets can be used for machine learning purposes. -In order to facilitate the determination of recognizer performance on labelled datasets, `APexe` can be run from the `Egret` software. `Egret` can greatly speed up the preparation of labelled datasets and can greatly improve the performance of a recognizer by more careful selection of positive and negative examples. `Egret` is available from [https://github.com/QutEcoacoustics/egret](https://github.com/QutEcoacoustics/egret). +In order to facilitate the determination of recognizer performance on labelled datasets, _AP_ can be run from the `Egret` software. `Egret` can greatly speed up the preparation of labelled datasets and can greatly improve the performance of a recognizer by more careful selection of positive and negative examples. `Egret` is available from [https://github.com/QutEcoacoustics/egret](https://github.com/QutEcoacoustics/egret). ================================================================== diff --git a/docs/guides/toc.yml b/docs/guides/toc.yml index f4912e980..39e15d9c6 100644 --- a/docs/guides/toc.yml +++ b/docs/guides/toc.yml @@ -8,4 +8,4 @@ href: using_r.md - name: DIY Call Recognizer - href: DIY_CallRecognizersUsingAP.md + href: generic_recognizers.md diff --git a/docs/guides/using_r.md b/docs/guides/using_r.md index cdc631e0e..76cb14f8a 100644 --- a/docs/guides/using_r.md +++ b/docs/guides/using_r.md @@ -57,6 +57,7 @@ for(file in files) { ## Script explained ### Set the directory containing the files + Assign using the left arrow operator `<- `the folder where the audio files are located to the variable `directory `, like this: diff --git a/docs/guides/Images/SevenKindsAcousticEvent.jpg b/docs/images/SevenKindsAcousticEvent.jpg similarity index 100% rename from docs/guides/Images/SevenKindsAcousticEvent.jpg rename to docs/images/SevenKindsAcousticEvent.jpg diff --git a/docs/images/installer_screenshot.png b/docs/images/installer_screenshot.png new file mode 100644 index 000000000..e58bdc7c5 --- /dev/null +++ b/docs/images/installer_screenshot.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dcf26eab64dbd799e676fd410e55771789c247bdc79b312a2b3b2ac66c138e0 +size 126238 diff --git a/docs/templates/AP/partials/scripts.tmpl.partial b/docs/templates/AP/partials/scripts.tmpl.partial index fe34698f6..d44d0cee3 100644 --- a/docs/templates/AP/partials/scripts.tmpl.partial +++ b/docs/templates/AP/partials/scripts.tmpl.partial @@ -14,4 +14,4 @@ mermaid.init({ securityLevel: 'loose' }, document.querySelectorAll("code.lang-mermaid")); - \ No newline at end of file + diff --git a/docs/templates/AP/styles/main.css b/docs/templates/AP/styles/main.css index 27fd20e0c..7caa28ef8 100644 --- a/docs/templates/AP/styles/main.css +++ b/docs/templates/AP/styles/main.css @@ -13,4 +13,13 @@ } .index-toc .nav li{ display: revert; +} + +figure { + padding-bottom: 1em; +} + +figure>figcaption { + font-weight: bold; + text-align: center; } \ No newline at end of file diff --git a/docs/theory/acoustic_events.md b/docs/theory/acoustic_events.md new file mode 100644 index 000000000..6956536fb --- /dev/null +++ b/docs/theory/acoustic_events.md @@ -0,0 +1,64 @@ +--- +title: Acoustic Events +uid: theory-acoustic-events +--- + +# Acoustic Events + +An *acoustic event* is defined as an interval of acoustic energy, above background noise level, emitted from a single source. + +For recognition purposes, it can also be defined as a contiguous set of spectrogram cells/pixels whose decibel values +exceed some user defined threshold. + +In the ideal case, an acoustic event should encompass a discrete component of acoustic energy within a call, syllable +or harmonic. It will be separated from other acoustic events by intervening pixels having decibel values *below* the user defined threshold. + +![Seven Kinds Of Acoustic Event](../images/SevenKindsAcousticEvent.jpg) + +## Simple Events + +### Shrieks + +This is a diffuse acoustic event that is extended in both time and frequency. While a shriek may have some internal +structure, it is often treated by as a "blob" of acoustic energy. A typical example is a parrot shriek. + +### Whistles + +This is a narrow band, "pure" tone having duration over several to many time frames but having very restricted bandwidth. +In theory a pure tone occupies a single frequency bin, but in practice bird whistles can occupy several frequency bins +and appear as a horizontal *spectral track* in the spectrogram. + +### Chirps + +This sounds like a whistle whose frequency increases or decreases over time. A chirp is said to be a *frequency modulated* +tone. It appears in the spectrogram as a gently ascending or descending *spectral track*. + +### Whips + +A *whip* is like a *chirp* except that the frequency modulation can be extremely rapid so that it sounds like a +"whip crack". It has the appearance of a steeply ascending or descending *spectral track* in the spectrogram. +An archetypal whip is the final component in the whistle-whip of the Australian whip-bird. + +In _AP_, the distinction between a chirp and a whip is not sharp. That is, a *spectral track* that is ascending +diagonally (cell-wise) at 45 degrees in the spectrogram will be detected by both the *chirp* and the *whip* algorithms. + +### Clicks + +The *click* appears as a single vertical line in a spectrogram and sounds, like the name suggests, as a very brief click. +In practice, depending on spectrogram configuration settings, a *click* may occupy two or more adjacent time-frames. + +Note that each of the above five acoustic events are "simple" events. The remaining two kinds of acoustic event are said +to be composite, that is, they are composed of more than one acoustic event but the detection algorithm is designed to +pick them up as a single event. + +## Complex Events + +### Oscillations + +An oscillation is the same (or nearly the same) syllable (typically whips or clicks) repeated at a fixed periodicity over +several to many time-frames. + +### Harmonics + +Harmonics are the same/similar shaped *whistle* or *chirp* repeated simultaneously at multiple intervals of frequency. +Typically, the frequency intervals are similar as one ascends the stack of harmonics. diff --git a/docs/theory/glossary.md b/docs/theory/glossary.md index e69de29bb..cd0b790d6 100644 --- a/docs/theory/glossary.md +++ b/docs/theory/glossary.md @@ -0,0 +1,12 @@ +--- +title: Glossary +uid: theory-glossary +--- + +## Call + +A *call* is taken to be any sound of animal origin (whether for communication purposes or not) and includes bird +songs/calls, animal vocalizations of any kind, the stridulation of insects, the wingbeats of birds and bats and the +various sounds produced by aquatic animals. Calls typically have temporal and spectral structure. For example they +may consist of a temporal sequence of two or more *syllables* (with "gaps" in between) or a set of simultaneous +*harmonics* or *formants* diff --git a/docs/theory/spectrograms.md b/docs/theory/spectrograms.md new file mode 100644 index 000000000..75b506476 --- /dev/null +++ b/docs/theory/spectrograms.md @@ -0,0 +1,12 @@ +--- +title: Spectrogram +uid: theory-spectrograms +--- + +# Spectrograms + +A spectrogram is processed as a matrix of real values but visualized as a grey-scale image. Each row of pixels is a frequency bin and each column of pixels is a time-frame. The value in each spectrogram/matrix cell (represented visually by one image pixel) is the acoustic intensity in decibels with respect to the background noise baseline. Note that the decibel values in a noise-reduced spectrogram are always positive. + +## Noise reduction + +[TODO] \ No newline at end of file diff --git a/docs/theory/toc.yml b/docs/theory/toc.yml index 0b8e1e189..06436974e 100644 --- a/docs/theory/toc.yml +++ b/docs/theory/toc.yml @@ -1,4 +1,11 @@ +- name: Spectrograms + href: xref:theory-spectrograms + +- name: Acoustic Events + href: xref:theory-acoustic-events + - name: Acoustic Indices href: indices.md + - name: Glossary href: glossary.md diff --git a/src/AnalysisConfigFiles/Towsey.Acoustic.yml b/src/AnalysisConfigFiles/Towsey.Acoustic.yml index 1d153225d..42d9730bf 100644 --- a/src/AnalysisConfigFiles/Towsey.Acoustic.yml +++ b/src/AnalysisConfigFiles/Towsey.Acoustic.yml @@ -10,12 +10,12 @@ #The AnalysisName property is no longer required. Analysis type is now identified by config file name. #AnalysisName: Towsey.Acoustic -#SegmentDuration: units=seconds; +# SegmentDuration: units=seconds; # Long duration recordings are cut into short segments for more efficient processing. Default segment length = 60 seconds. # WARNING: You should not change this property!! SegmentDuration: 60 -#SegmentOverlap: units=seconds; +# SegmentOverlap: units=seconds; # WARNING: You should not change this property!! SegmentOverlap: 0 @@ -121,12 +121,12 @@ LdSpectrogramConfig: # When filterCoeff =-1.0, small values are maximally de-emphasized, i.e. y=x^2. # Generally usage suggests that a value of -0.25 is suitable. i.e. a slight de-emphasis. ColourFilter: -0.25 - + # The third index in the color map is always mapped to blue. The eye is less sensitive to blue and it can be difficult to see in dark background. # Therefore we enhance the blue by making it brighter, but only when the red and green values are low. # This could be done better but can be a helpful! The intention is to create a more visible light blue color. # The default value for BlueEnhanceParameter = 0.0 i.e. do no enhancement. - # Suggested value is 0.4 when want to enhance visualisation of the "blue" index. + # Suggested value is 0.4 when want to enhance visualisation of the "blue" index. BlueEnhanceParameter: 0.4 # minutes x-axis scale