Skip to content

Commit

Permalink
Temporarily remove compute_NASC (#1136)
Browse files Browse the repository at this point in the history
* comment out nasc-related sections

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove import

* fix pre commit problems

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
leewujung and pre-commit-ci[bot] authored Aug 27, 2023
1 parent 3407147 commit d2ffc3b
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 153 deletions.
3 changes: 2 additions & 1 deletion docs/source/data-proc-func.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
" - Quantities that can be computed from data stored in the `EchoData` object, e.g., for EK80 broadband data, split-beam angle can be computed from the complex samples\n",
" - External dataset, e.g., AZFP echosounders do not record geospatial data, but typically there is a companion dataset with GPS information\n",
"- [`clean`](echopype.clean): Reduce variabilities in backscatter data by perform noise removal operations. Currently contains only a simple noise removal function implementing the algorithm in {cite}`DeRobertis2007_noise`.\n",
"- [`commongrid`](echopype.commongrid): Enhance the spatial and temporal coherence of data. Currently contain functions to compute mean volume backscattering strength (MVBS) and nautical areal backscattering coefficients (NASC) that both result in gridded data at uniform spatial and temporal intervals.\n",
"- [`commongrid`](echopype.commongrid): Enhance the spatial and temporal coherence of data. Currently contains functions to compute mean volume backscattering strength (MVBS) that result in gridded data at uniform spatial and temporal intervals based on either number of indices or label values (phyiscal units).\n",
" <!-- Currently contain functions to compute mean volume backscattering strength (MVBS) and nautical areal backscattering coefficients (NASC) that both result in gridded data at uniform spatial and temporal intervals. -->\n",
"- [`qc`](echopype.qc): Handle unexpected irregularities in the data. Currently contains only functions to handle timestamp reversal in EK60/EK80 raw data.\n",
"- [`mask`](echopype.mask): Create or apply mask to segment data\n",
"- [`metrics`](echopype.metrics): Calculate simple summary statistics from data\n",
Expand Down
3 changes: 1 addition & 2 deletions echopype/commongrid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC
from .api import compute_MVBS, compute_MVBS_index_binning

__all__ = [
"compute_MVBS",
"compute_MVBS_index_binning",
"compute_NASC",
]
258 changes: 125 additions & 133 deletions echopype/commongrid/api.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
"""
Functions for enhancing the spatial and temporal coherence of data.
"""
from typing import Union

import numpy as np
import pandas as pd
import xarray as xr

from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level
from .mvbs import get_MVBS_along_channels
from .nasc import (
check_identical_depth,
get_depth_bin_info,
get_dist_bin_info,
get_distance_from_latlon,
)


def _set_var_attrs(da, long_name, units, round_digits, standard_name=None):
Expand Down Expand Up @@ -255,131 +247,131 @@ def compute_MVBS_index_binning(ds_Sv, range_sample_num=100, ping_num=100):
return ds_MVBS


def compute_NASC(
ds_Sv: xr.Dataset,
cell_dist: Union[int, float], # TODO: allow xr.DataArray
cell_depth: Union[int, float], # TODO: allow xr.DataArray
) -> xr.Dataset:
"""
Compute Nautical Areal Scattering Coefficient (NASC) from an Sv dataset.
Parameters
----------
ds_Sv : xr.Dataset
A dataset containing Sv data.
The Sv dataset must contain ``latitude``, ``longitude``, and ``depth`` as data variables.
cell_dist: int, float
The horizontal size of each NASC cell, in nautical miles [nmi]
cell_depth: int, float
The vertical size of each NASC cell, in meters [m]
Returns
-------
xr.Dataset
A dataset containing NASC
Notes
-----
The NASC computation implemented here corresponds to the Echoview algorithm PRC_NASC
https://support.echoview.com/WebHelp/Reference/Algorithms/Analysis_Variables/PRC_ABC_and_PRC_NASC.htm#PRC_NASC # noqa
The difference is that since in echopype masking of the Sv dataset is done explicitly using
functions in the ``mask`` subpackage so the computation only involves computing the
mean Sv and the mean height within each cell.
In addition, here the binning of pings into individual cells is based on the actual horizontal
distance computed from the latitude and longitude coordinates of each ping in the Sv dataset.
Therefore, both regular and irregular horizontal distance in the Sv dataset are allowed.
This is different from Echoview's assumption of constant ping rate, vessel speed, and sample
thickness when computing mean Sv.
"""
# Check Sv contains lat/lon
if "latitude" not in ds_Sv or "longitude" not in ds_Sv:
raise ValueError("Both 'latitude' and 'longitude' must exist in the input Sv dataset.")

# Check if depth vectors are identical within each channel
if not ds_Sv["depth"].groupby("channel").map(check_identical_depth).all():
raise ValueError(
"Only Sv data with identical depth vectors across all pings "
"are allowed in the current compute_NASC implementation."
)

# Get distance from lat/lon in nautical miles
dist_nmi = get_distance_from_latlon(ds_Sv)

# Find binning indices along distance
bin_num_dist, dist_bin_idx = get_dist_bin_info(dist_nmi, cell_dist) # dist_bin_idx is 1-based

# Find binning indices along depth: channel-dependent
bin_num_depth, depth_bin_idx = get_depth_bin_info(ds_Sv, cell_depth) # depth_bin_idx is 1-based

# Compute mean sv (volume backscattering coefficient, linear scale)
# This is essentially to compute MVBS over a the cell defined here,
# which are typically larger than those used for MVBS.
# The implementation below is brute force looping, but can be optimized
# by experimenting with different delayed schemes.
# The optimized routines can then be used here and
# in commongrid.compute_MVBS and clean.estimate_noise
sv_mean = []
for ch_seq in np.arange(ds_Sv["channel"].size):
# TODO: .compute each channel sequentially?
# dask.delay within each channel?
ds_Sv_ch = ds_Sv["Sv"].isel(channel=ch_seq).data # preserve the underlying type

sv_mean_dist_depth = []
for dist_idx in np.arange(bin_num_dist) + 1: # along ping_time
sv_mean_depth = []
for depth_idx in np.arange(bin_num_depth) + 1: # along depth
# Sv dim: ping_time x depth
Sv_cut = ds_Sv_ch[dist_idx == dist_bin_idx, :][
:, depth_idx == depth_bin_idx[ch_seq]
]
sv_mean_depth.append(np.nanmean(10 ** (Sv_cut / 10)))
sv_mean_dist_depth.append(sv_mean_depth)

sv_mean.append(sv_mean_dist_depth)

# Compute mean height
# For data with uniform depth step size, mean height = vertical size of cell
height_mean = cell_depth
# TODO: generalize to variable depth step size

ds_NASC = xr.DataArray(
np.array(sv_mean) * height_mean,
dims=["channel", "distance", "depth"],
coords={
"channel": ds_Sv["channel"].values,
"distance": np.arange(bin_num_dist) * cell_dist,
"depth": np.arange(bin_num_depth) * cell_depth,
},
name="NASC",
).to_dataset()

ds_NASC["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal

# Attach attributes
_set_var_attrs(
ds_NASC["NASC"],
long_name="Nautical Areal Scattering Coefficient (NASC, m2 nmi-2)",
units="m2 nmi-2",
round_digits=3,
)
_set_var_attrs(ds_NASC["distance"], "Cumulative distance", "m", 3)
_set_var_attrs(ds_NASC["depth"], "Cell depth", "m", 3, standard_name="depth")

# Calculate and add ACDD bounding box global attributes
ds_NASC.attrs["Conventions"] = "CF-1.7,ACDD-1.3"
ds_NASC.attrs["time_coverage_start"] = np.datetime_as_string(
ds_Sv["ping_time"].min().values, timezone="UTC"
)
ds_NASC.attrs["time_coverage_end"] = np.datetime_as_string(
ds_Sv["ping_time"].max().values, timezone="UTC"
)
ds_NASC.attrs["geospatial_lat_min"] = round(float(ds_Sv["latitude"].min().values), 5)
ds_NASC.attrs["geospatial_lat_max"] = round(float(ds_Sv["latitude"].max().values), 5)
ds_NASC.attrs["geospatial_lon_min"] = round(float(ds_Sv["longitude"].min().values), 5)
ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5)

return ds_NASC
# def compute_NASC(
# ds_Sv: xr.Dataset,
# cell_dist: Union[int, float], # TODO: allow xr.DataArray
# cell_depth: Union[int, float], # TODO: allow xr.DataArray
# ) -> xr.Dataset:
# """
# Compute Nautical Areal Scattering Coefficient (NASC) from an Sv dataset.

# Parameters
# ----------
# ds_Sv : xr.Dataset
# A dataset containing Sv data.
# The Sv dataset must contain ``latitude``, ``longitude``, and ``depth`` as data variables.
# cell_dist: int, float
# The horizontal size of each NASC cell, in nautical miles [nmi]
# cell_depth: int, float
# The vertical size of each NASC cell, in meters [m]

# Returns
# -------
# xr.Dataset
# A dataset containing NASC

# Notes
# -----
# The NASC computation implemented here corresponds to the Echoview algorithm PRC_NASC
# https://support.echoview.com/WebHelp/Reference/Algorithms/Analysis_Variables/PRC_ABC_and_PRC_NASC.htm#PRC_NASC # noqa
# The difference is that since in echopype masking of the Sv dataset is done explicitly using
# functions in the ``mask`` subpackage so the computation only involves computing the
# mean Sv and the mean height within each cell.

# In addition, here the binning of pings into individual cells is based on the actual horizontal
# distance computed from the latitude and longitude coordinates of each ping in the Sv dataset.
# Therefore, both regular and irregular horizontal distance in the Sv dataset are allowed.
# This is different from Echoview's assumption of constant ping rate, vessel speed, and sample
# thickness when computing mean Sv.
# """
# # Check Sv contains lat/lon
# if "latitude" not in ds_Sv or "longitude" not in ds_Sv:
# raise ValueError("Both 'latitude' and 'longitude' must exist in the input Sv dataset.")

# # Check if depth vectors are identical within each channel
# if not ds_Sv["depth"].groupby("channel").map(check_identical_depth).all():
# raise ValueError(
# "Only Sv data with identical depth vectors across all pings "
# "are allowed in the current compute_NASC implementation."
# )

# # Get distance from lat/lon in nautical miles
# dist_nmi = get_distance_from_latlon(ds_Sv)

# # Find binning indices along distance
# bin_num_dist, dist_bin_idx = get_dist_bin_info(dist_nmi, cell_dist) # dist_bin_idx is 1-based

# # Find binning indices along depth: channel-dependent
# bin_num_depth, depth_bin_idx = get_depth_bin_info(ds_Sv, cell_depth) # depth_bin_idx is 1-based # noqa

# # Compute mean sv (volume backscattering coefficient, linear scale)
# # This is essentially to compute MVBS over a the cell defined here,
# # which are typically larger than those used for MVBS.
# # The implementation below is brute force looping, but can be optimized
# # by experimenting with different delayed schemes.
# # The optimized routines can then be used here and
# # in commongrid.compute_MVBS and clean.estimate_noise
# sv_mean = []
# for ch_seq in np.arange(ds_Sv["channel"].size):
# # TODO: .compute each channel sequentially?
# # dask.delay within each channel?
# ds_Sv_ch = ds_Sv["Sv"].isel(channel=ch_seq).data # preserve the underlying type

# sv_mean_dist_depth = []
# for dist_idx in np.arange(bin_num_dist) + 1: # along ping_time
# sv_mean_depth = []
# for depth_idx in np.arange(bin_num_depth) + 1: # along depth
# # Sv dim: ping_time x depth
# Sv_cut = ds_Sv_ch[dist_idx == dist_bin_idx, :][
# :, depth_idx == depth_bin_idx[ch_seq]
# ]
# sv_mean_depth.append(np.nanmean(10 ** (Sv_cut / 10)))
# sv_mean_dist_depth.append(sv_mean_depth)

# sv_mean.append(sv_mean_dist_depth)

# # Compute mean height
# # For data with uniform depth step size, mean height = vertical size of cell
# height_mean = cell_depth
# # TODO: generalize to variable depth step size

# ds_NASC = xr.DataArray(
# np.array(sv_mean) * height_mean,
# dims=["channel", "distance", "depth"],
# coords={
# "channel": ds_Sv["channel"].values,
# "distance": np.arange(bin_num_dist) * cell_dist,
# "depth": np.arange(bin_num_depth) * cell_depth,
# },
# name="NASC",
# ).to_dataset()

# ds_NASC["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal

# # Attach attributes
# _set_var_attrs(
# ds_NASC["NASC"],
# long_name="Nautical Areal Scattering Coefficient (NASC, m2 nmi-2)",
# units="m2 nmi-2",
# round_digits=3,
# )
# _set_var_attrs(ds_NASC["distance"], "Cumulative distance", "m", 3)
# _set_var_attrs(ds_NASC["depth"], "Cell depth", "m", 3, standard_name="depth")

# # Calculate and add ACDD bounding box global attributes
# ds_NASC.attrs["Conventions"] = "CF-1.7,ACDD-1.3"
# ds_NASC.attrs["time_coverage_start"] = np.datetime_as_string(
# ds_Sv["ping_time"].min().values, timezone="UTC"
# )
# ds_NASC.attrs["time_coverage_end"] = np.datetime_as_string(
# ds_Sv["ping_time"].max().values, timezone="UTC"
# )
# ds_NASC.attrs["geospatial_lat_min"] = round(float(ds_Sv["latitude"].min().values), 5)
# ds_NASC.attrs["geospatial_lat_max"] = round(float(ds_Sv["latitude"].max().values), 5)
# ds_NASC.attrs["geospatial_lon_min"] = round(float(ds_Sv["longitude"].min().values), 5)
# ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5)

# return ds_NASC


def regrid():
Expand Down
5 changes: 5 additions & 0 deletions echopype/commongrid/nasc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
"""
An overhaul is required for the below compute_NASC implementation, to:
- increase the computational efficiency
- debug the current code of any discrepancy against Echoview implementation
- potentially provide an alternative based on ping-by-ping Sv
This script contains functions used by commongrid.compute_NASC,
but a subset of these overlap with operations needed
for commongrid.compute_MVBS and clean.estimate_noise.
Expand Down
34 changes: 17 additions & 17 deletions echopype/tests/commongrid/test_nasc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ def ek60_path(test_path):
return test_path['EK60']


def test_compute_NASC(ek60_path):
raw_path = ek60_path / "ncei-wcsd/Summer2017-D20170620-T011027.raw"

ed = open_raw(raw_path, sonar_model="EK60")
ds_Sv = add_depth(add_location(compute_Sv(ed), ed, nmea_sentence="GGA"))
cell_dist = 0.1
cell_depth = 20
ds_NASC = compute_NASC(ds_Sv, cell_dist, cell_depth)

dist_nmi = get_distance_from_latlon(ds_Sv)

# Check dimensions
da_NASC = ds_NASC["NASC"]
assert da_NASC.dims == ("channel", "distance", "depth")
assert np.all(ds_NASC["channel"].values == ds_Sv["channel"].values)
assert da_NASC["depth"].size == np.ceil(ds_Sv["depth"].max() / cell_depth)
assert da_NASC["distance"].size == np.ceil(dist_nmi.max() / cell_dist)
# def test_compute_NASC(ek60_path):
# raw_path = ek60_path / "ncei-wcsd/Summer2017-D20170620-T011027.raw"

# ed = open_raw(raw_path, sonar_model="EK60")
# ds_Sv = add_depth(add_location(compute_Sv(ed), ed, nmea_sentence="GGA"))
# cell_dist = 0.1
# cell_depth = 20
# ds_NASC = compute_NASC(ds_Sv, cell_dist, cell_depth)

# dist_nmi = get_distance_from_latlon(ds_Sv)

# # Check dimensions
# da_NASC = ds_NASC["NASC"]
# assert da_NASC.dims == ("channel", "distance", "depth")
# assert np.all(ds_NASC["channel"].values == ds_Sv["channel"].values)
# assert da_NASC["depth"].size == np.ceil(ds_Sv["depth"].max() / cell_depth)
# assert da_NASC["distance"].size == np.ceil(dist_nmi.max() / cell_dist)

0 comments on commit d2ffc3b

Please sign in to comment.