Skip to content

Commit

Permalink
Improved input syntax for frequency_differencing operation (#1106)
Browse files Browse the repository at this point in the history
* Improved input syntax of `frequency_differencing`

* fixed minor changes

* fixed minor bug in utils tests

* fixed wrong freqAB issue
  • Loading branch information
praneethratna authored Aug 31, 2023
1 parent fcb41c4 commit 5abca87
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 188 deletions.
164 changes: 21 additions & 143 deletions echopype/mask/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from ..utils.io import validate_source_ds_da
from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level
from .freq_diff import _check_freq_diff_source_Sv, _parse_freq_diff_eq

# lookup table with key string operator and value as corresponding Python operator
str2ops = {
Expand Down Expand Up @@ -366,128 +367,11 @@ def get_ch_shape(da):
return output_ds


def _check_freq_diff_non_data_inputs(
freqAB: Optional[List[float]] = None,
chanAB: Optional[List[str]] = None,
operator: str = ">",
diff: Union[float, int] = None,
) -> None:
"""
Checks that the non-data related inputs of ``frequency_differencing`` (i.e. ``freqAB``,
``chanAB``, ``operator``, ``diff``) were correctly provided.
Parameters
----------
freqAB: list of float, optional
The pair of nominal frequencies to be used for frequency-differencing, where
the first element corresponds to ``freqA`` and the second element corresponds
to ``freqB``
chanAB: list of float, optional
The pair of channels that will be used to select the nominal frequencies to be
used for frequency-differencing, where the first element corresponds to ``freqA``
and the second element corresponds to ``freqB``
operator: {">", "<", "<=", ">=", "=="}
The operator for the frequency-differencing
diff: float or int
The threshold of Sv difference between frequencies
"""

# check that either freqAB or chanAB are provided and they are a list of length 2
if (freqAB is None) and (chanAB is None):
raise ValueError("Either freqAB or chanAB must be given!")
elif (freqAB is not None) and (chanAB is not None):
raise ValueError("Only freqAB or chanAB must be given, but not both!")
elif freqAB is not None:
if not isinstance(freqAB, list):
raise TypeError("freqAB must be a list!")
elif len(set(freqAB)) != 2:
raise ValueError("freqAB must be a list of length 2 with unique elements!")
else:
if not isinstance(chanAB, list):
raise TypeError("chanAB must be a list!")
elif len(set(chanAB)) != 2:
raise ValueError("chanAB must be a list of length 2 with unique elements!")

# check that operator is a string and a valid operator
if not isinstance(operator, str):
raise TypeError("operator must be a string!")
else:
if operator not in [">", "<", "<=", ">=", "=="]:
raise ValueError("Invalid operator!")

# ensure that diff is a float or an int
if not isinstance(diff, (float, int)):
raise TypeError("diff must be a float or int!")


def _check_source_Sv_freq_diff(
source_Sv: xr.Dataset,
freqAB: Optional[List[float]] = None,
chanAB: Optional[List[str]] = None,
) -> None:
"""
Ensures that ``source_Sv`` contains ``channel`` as a coordinate and
``frequency_nominal`` as a variable, the provided list input
(``freqAB`` or ``chanAB``) are contained in the coordinate ``channel``
or variable ``frequency_nominal``, and ``source_Sv`` does not have
repeated values for ``channel`` and ``frequency_nominal``.
Parameters
----------
source_Sv: xr.Dataset
A Dataset that contains the Sv data to create a mask for
freqAB: list of float, optional
The pair of nominal frequencies to be used for frequency-differencing, where
the first element corresponds to ``freqA`` and the second element corresponds
to ``freqB``
chanAB: list of float, optional
The pair of channels that will be used to select the nominal frequencies to be
used for frequency-differencing, where the first element corresponds to ``freqA``
and the second element corresponds to ``freqB``
"""

# check that channel and frequency nominal are in source_Sv
if "channel" not in source_Sv.coords:
raise ValueError("The Dataset defined by source_Sv must have channel as a coordinate!")
elif "frequency_nominal" not in source_Sv.variables:
raise ValueError(
"The Dataset defined by source_Sv must have frequency_nominal as a variable!"
)

# make sure that the channel and frequency_nominal values are not repeated in source_Sv
if len(set(source_Sv.channel.values)) < source_Sv.channel.size:
raise ValueError(
"The provided source_Sv contains repeated channel values, this is not allowed!"
)

if len(set(source_Sv.frequency_nominal.values)) < source_Sv.frequency_nominal.size:
raise ValueError(
"The provided source_Sv contains repeated frequency_nominal "
"values, this is not allowed!"
)

# check that the elements of freqAB are in frequency_nominal
if (freqAB is not None) and (not all([freq in source_Sv.frequency_nominal for freq in freqAB])):
raise ValueError(
"The provided list input freqAB contains values that "
"are not in the frequency_nominal variable!"
)

# check that the elements of chanAB are in channel
if (chanAB is not None) and (not all([chan in source_Sv.channel for chan in chanAB])):
raise ValueError(
"The provided list input chanAB contains values that are "
"not in the channel coordinate!"
)


def frequency_differencing(
source_Sv: Union[xr.Dataset, str, pathlib.Path],
storage_options: Optional[dict] = {},
freqAB: Optional[List[float]] = None,
chanAB: Optional[List[str]] = None,
operator: str = ">",
diff: Union[float, int] = None,
freqABEq: Optional[str] = None,
chanABEq: Optional[str] = None,
) -> xr.DataArray:
"""
Create a mask based on the differences of Sv values using a pair of
Expand All @@ -504,19 +388,13 @@ def frequency_differencing(
storage_options: dict, optional
Any additional parameters for the storage backend, corresponding to the
path provided for ``source_Sv``
freqAB: list of float, optional
The pair of nominal frequencies to be used for frequency-differencing, where
the first element corresponds to ``freqA`` and the second element corresponds
to ``freqB``. Only one of ``freqAB`` and ``chanAB`` should be provided, and not both.
chanAB: list of strings, optional
The pair of channels that will be used to select the nominal frequencies to be
used for frequency-differencing, where the first element corresponds to ``freqA``
and the second element corresponds to ``freqB``. Only one of ``freqAB`` and ``chanAB``
freqABEq: string, optional
The frequency differencing criteria.
Only one of ``freqAB`` and ``chanAB`` should be provided, and not both.
chanAB: string, optional
The frequency differencing criteria in terms of channel names where channel names
in the criteria are enclosed in double quotes. Only one of ``freqAB`` and ``chanAB``
should be provided, and not both.
operator: {">", "<", "<=", ">=", "=="}
The operator for the frequency-differencing
diff: float or int
The threshold of Sv difference between frequencies
Returns
-------
Expand All @@ -527,24 +405,24 @@ def frequency_differencing(
Raises
------
ValueError
If neither ``freqAB`` or ``chanAB`` are given
If neither ``freqABEq`` or ``chanABEq`` are given
ValueError
If both ``freqAB`` and ``chanAB`` are given
If both ``freqABEq`` and ``chanABEq`` are given
TypeError
If any input is not of the correct type
ValueError
If either ``freqAB`` or ``chanAB`` are provided and the list
does not contain 2 distinct elements
If either ``freqABEq`` or ``chanABEq`` are provided and the extracted
``freqAB`` or ``chanAB`` does not contain 2 distinct elements
ValueError
If ``freqAB`` contains values that are not contained in ``frequency_nominal``
If ``freqABEq`` contains values that are not contained in ``frequency_nominal``
ValueError
If ``chanAB`` contains values that not contained in ``channel``
If ``chanABEq`` contains values that not contained in ``channel``
ValueError
If ``operator`` is not one of the following: ``">", "<", "<=", ">=", "=="``
ValueError
If the path provided for ``source_Sv`` is not a valid path
ValueError
If ``freqAB`` or ``chanAB`` is provided and the Dataset produced by ``source_Sv``
If ``freqABEq`` or ``chanABEq`` is provided and the Dataset produced by ``source_Sv``
does not contain the coordinate ``channel`` and variable ``frequency_nominal``
Notes
Expand Down Expand Up @@ -573,9 +451,8 @@ def frequency_differencing(
>>> Sv_ds = xr.Dataset(data_vars={"Sv": Sv_da, "frequency_nominal": freq_nom})
...
>>> # compute frequency-differencing mask using channel names
>>> echopype.mask.frequency_differencing(source_Sv=mock_Sv_ds, storage_options={}, freqAB=None,
... chanAB = ['chan1', 'chan2'],
... operator = ">=", diff=10.0)
>>> echopype.mask.frequency_differencing(source_Sv=mock_Sv_ds, storage_options={},
... freqABEq=None, chanABEq = '"chan1" - "chan2">=10.0')
<xarray.DataArray 'mask' (ping_time: 5, range_sample: 5)>
array([[False, False, False, False, False],
[False, False, False, False, False],
Expand All @@ -588,7 +465,8 @@ def frequency_differencing(
"""

# check that non-data related inputs were correctly provided
_check_freq_diff_non_data_inputs(freqAB, chanAB, operator, diff)
# _check_freq_diff_non_data_inputs(freqAB, chanAB, operator, diff)
freqAB, chanAB, operator, diff = _parse_freq_diff_eq(freqABEq, chanABEq)

# validate the source_Sv type or path (if it is provided)
source_Sv, file_type = validate_source_ds_da(source_Sv, storage_options)
Expand All @@ -598,7 +476,7 @@ def frequency_differencing(
source_Sv = xr.open_dataset(source_Sv, engine=file_type, chunks={}, **storage_options)

# check the source_Sv with respect to channel and frequency_nominal
_check_source_Sv_freq_diff(source_Sv, freqAB, chanAB)
_check_freq_diff_source_Sv(source_Sv, freqAB, chanAB)

# determine chanA and chanB
if freqAB is not None:
Expand Down
149 changes: 149 additions & 0 deletions echopype/mask/freq_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import re
from typing import List, Optional, Union

import xarray as xr


def _parse_freq_diff_eq(
freqABEq: Optional[str] = None,
chanABEq: Optional[str] = None,
) -> List[Union[List[float], List[str], str, Union[float, int]]]:
"""
Checks if either `freqABEq` or `chanABEq` is provided and parse the arguments accordingly
from the frequency diffrencing criteria.
Parameters
----------
freqABEq : str, optional
The equation for frequency-differencing using frequency values.
chanABEq : str, optional
The equation for frequency-differencing using channel names.
Returns
-------
List[Union[List[float], List[str], str, Union[float, int]]]
A list containing the parsed arguments for frequency-differencing, where the first element
corresponds to `freqAB`, the second element corresponds to `chanAB`, the third element
corresponds to `operator`, the fourth element corresponds to `diff`.
Raises
------
ValueError
If `operator` is not a valid operator.
If both `freqABEq` and `chanABEq` are provided.
If neither `freqABEq` nor `chanABEq` is provided.
If `freqAB` or `chanAB` is not a list of length 2 with unique elements.
TypeError
If `diff` is not a float or an int.
If `freqABEq` or `chanABEq` is not a valid equation.
"""

if (freqABEq is None) and (chanABEq is None):
raise ValueError("Either freqAB or chanAB must be given!")
elif (freqABEq is not None) and (chanABEq is not None):
raise ValueError("Only one of freqAB or chanAB should be given, but not both!")
elif freqABEq is not None:
freqAPattern = r"(?P<freqA>\d*\.\d+)\s*(?P<unitA>\w?)Hz"
freqBPattern = r"(?P<freqB>\d*\.\d+)\s*(?P<unitB>\w?)Hz"
operatorPattern = r"\s*(?P<cmp>\S*?)\s*"
rhsPattern = r"(?P<db>\d*\.?\d+)\s*dB"
diffMatcher = re.compile(
freqAPattern + r"\s*-\s*" + freqBPattern + operatorPattern + rhsPattern
)
eqMatched = diffMatcher.match(freqABEq)
if eqMatched is None:
raise TypeError("Invalid freqAB Equation!")
operator = eqMatched["cmp"]
if operator not in [">", "<", "<=", ">=", "=="]:
raise ValueError("Invalid operator!")
freqMultiplier = {"": 1, "k": 1e3, "M": 1e6, "G": 1e9}
freqA = float(eqMatched["freqA"]) * freqMultiplier[eqMatched["unitA"]]
freqB = float(eqMatched["freqB"]) * freqMultiplier[eqMatched["unitB"]]
freqAB = [freqA, freqB]
if len(set(freqAB)) != 2:
raise ValueError("freqAB must be a list of length 2 with unique elements!")
diff = float(eqMatched["db"])
return [freqAB, None, operator, diff]
elif chanABEq is not None:
chanAPattern = r"(?P<chanA>\".+\")\s*"
chanBPattern = r"(?P<chanB>\".+\")\s*"
operatorPattern = r"\s*(?P<cmp>\S*?)\s*"
rhsPattern = r"(?P<db>\d*\.?\d+)\s*dB"
diffMatcher = re.compile(
chanAPattern + r"\s*-\s*" + chanBPattern + operatorPattern + rhsPattern
)
eqMatched = diffMatcher.match(chanABEq)
if eqMatched is None:
raise TypeError("Invalid chanAB Equation!")
operator = eqMatched["cmp"]
if operator not in [">", "<", "<=", ">=", "=="]:
raise ValueError("Invalid operator!")
chanAB = [eqMatched["chanA"][1:-1], eqMatched["chanB"][1:-1]]
if len(set(chanAB)) != 2:
raise ValueError("chanAB must be a list of length 2 with unique elements!")
diff = float(eqMatched["db"])
return [None, chanAB, operator, diff]


def _check_freq_diff_source_Sv(
source_Sv: xr.Dataset,
freqAB: Optional[List[float]] = None,
chanAB: Optional[List[str]] = None,
) -> None:
"""
Ensures that ``source_Sv`` contains ``channel`` as a coordinate and
``frequency_nominal`` as a variable, the provided list input
(``freqAB`` or ``chanAB``) are contained in the coordinate ``channel``
or variable ``frequency_nominal``, and ``source_Sv`` does not have
repeated values for ``channel`` and ``frequency_nominal``.
Parameters
----------
source_Sv: xr.Dataset
A Dataset that contains the Sv data to create a mask for
freqAB: list of float, optional
The pair of nominal frequencies to be used for frequency-differencing, where
the first element corresponds to ``freqA`` and the second element corresponds
to ``freqB``
chanAB: list of float, optional
The pair of channels that will be used to select the nominal frequencies to be
used for frequency-differencing, where the first element corresponds to ``freqA``
and the second element corresponds to ``freqB``
"""

# check that channel and frequency nominal are in source_Sv
if "channel" not in source_Sv.coords:
raise ValueError("The Dataset defined by source_Sv must have channel as a coordinate!")
elif "frequency_nominal" not in source_Sv.variables:
raise ValueError(
"The Dataset defined by source_Sv must have frequency_nominal as a variable!"
)

# make sure that the channel values are not repeated in source_Sv and
# elements of chanAB are in channel
if chanAB is not None:
if len(set(source_Sv.channel.values)) < source_Sv.channel.size:
raise ValueError(
"The provided source_Sv contains repeated channel values, this is not allowed!"
)
if not all([chan in source_Sv.channel for chan in chanAB]):
raise ValueError(
"The provided list input chanAB contains values that are "
"not in the channel coordinate!"
)

# make sure that the frequency_nominal values are not repeated in source_Sv and
# elements of freqAB are in frequency_nominal
if freqAB is not None:
print(source_Sv.frequency_nominal.values)
if len(set(source_Sv.frequency_nominal.values)) < source_Sv.frequency_nominal.size:
raise ValueError(
"The provided source_Sv contains repeated "
"frequency_nominal values, this is not allowed!"
)

if not all([freq in source_Sv.frequency_nominal for freq in freqAB]):
raise ValueError(
"The provided list input freqAB contains values that "
"are not in the frequency_nominal variable!"
)
Loading

0 comments on commit 5abca87

Please sign in to comment.