From d2ffc3b7daa777a5d03f020f09e829b7907fbca9 Mon Sep 17 00:00:00 2001 From: Wu-Jung Lee Date: Sun, 27 Aug 2023 10:39:46 -0400 Subject: [PATCH] Temporarily remove `compute_NASC` (#1136) * comment out nasc-related sections * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove import * fix pre commit problems --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/source/data-proc-func.ipynb | 3 +- echopype/commongrid/__init__.py | 3 +- echopype/commongrid/api.py | 258 ++++++++++++------------- echopype/commongrid/nasc.py | 5 + echopype/tests/commongrid/test_nasc.py | 34 ++-- 5 files changed, 150 insertions(+), 153 deletions(-) diff --git a/docs/source/data-proc-func.ipynb b/docs/source/data-proc-func.ipynb index c48aa3459..97c3ee426 100644 --- a/docs/source/data-proc-func.ipynb +++ b/docs/source/data-proc-func.ipynb @@ -24,7 +24,8 @@ " - Quantities that can be computed from data stored in the `EchoData` object, e.g., for EK80 broadband data, split-beam angle can be computed from the complex samples\n", " - External dataset, e.g., AZFP echosounders do not record geospatial data, but typically there is a companion dataset with GPS information\n", "- [`clean`](echopype.clean): Reduce variabilities in backscatter data by perform noise removal operations. Currently contains only a simple noise removal function implementing the algorithm in {cite}`DeRobertis2007_noise`.\n", - "- [`commongrid`](echopype.commongrid): Enhance the spatial and temporal coherence of data. Currently contain functions to compute mean volume backscattering strength (MVBS) and nautical areal backscattering coefficients (NASC) that both result in gridded data at uniform spatial and temporal intervals.\n", + "- [`commongrid`](echopype.commongrid): Enhance the spatial and temporal coherence of data. Currently contains functions to compute mean volume backscattering strength (MVBS) that result in gridded data at uniform spatial and temporal intervals based on either number of indices or label values (phyiscal units).\n", + " \n", "- [`qc`](echopype.qc): Handle unexpected irregularities in the data. Currently contains only functions to handle timestamp reversal in EK60/EK80 raw data.\n", "- [`mask`](echopype.mask): Create or apply mask to segment data\n", "- [`metrics`](echopype.metrics): Calculate simple summary statistics from data\n", diff --git a/echopype/commongrid/__init__.py b/echopype/commongrid/__init__.py index cabbf5df7..642434353 100644 --- a/echopype/commongrid/__init__.py +++ b/echopype/commongrid/__init__.py @@ -1,7 +1,6 @@ -from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC +from .api import compute_MVBS, compute_MVBS_index_binning __all__ = [ "compute_MVBS", "compute_MVBS_index_binning", - "compute_NASC", ] diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index f05cdb96b..ee71c12ec 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -1,20 +1,12 @@ """ Functions for enhancing the spatial and temporal coherence of data. """ -from typing import Union - import numpy as np import pandas as pd import xarray as xr from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level from .mvbs import get_MVBS_along_channels -from .nasc import ( - check_identical_depth, - get_depth_bin_info, - get_dist_bin_info, - get_distance_from_latlon, -) def _set_var_attrs(da, long_name, units, round_digits, standard_name=None): @@ -255,131 +247,131 @@ def compute_MVBS_index_binning(ds_Sv, range_sample_num=100, ping_num=100): return ds_MVBS -def compute_NASC( - ds_Sv: xr.Dataset, - cell_dist: Union[int, float], # TODO: allow xr.DataArray - cell_depth: Union[int, float], # TODO: allow xr.DataArray -) -> xr.Dataset: - """ - Compute Nautical Areal Scattering Coefficient (NASC) from an Sv dataset. - - Parameters - ---------- - ds_Sv : xr.Dataset - A dataset containing Sv data. - The Sv dataset must contain ``latitude``, ``longitude``, and ``depth`` as data variables. - cell_dist: int, float - The horizontal size of each NASC cell, in nautical miles [nmi] - cell_depth: int, float - The vertical size of each NASC cell, in meters [m] - - Returns - ------- - xr.Dataset - A dataset containing NASC - - Notes - ----- - The NASC computation implemented here corresponds to the Echoview algorithm PRC_NASC - https://support.echoview.com/WebHelp/Reference/Algorithms/Analysis_Variables/PRC_ABC_and_PRC_NASC.htm#PRC_NASC # noqa - The difference is that since in echopype masking of the Sv dataset is done explicitly using - functions in the ``mask`` subpackage so the computation only involves computing the - mean Sv and the mean height within each cell. - - In addition, here the binning of pings into individual cells is based on the actual horizontal - distance computed from the latitude and longitude coordinates of each ping in the Sv dataset. - Therefore, both regular and irregular horizontal distance in the Sv dataset are allowed. - This is different from Echoview's assumption of constant ping rate, vessel speed, and sample - thickness when computing mean Sv. - """ - # Check Sv contains lat/lon - if "latitude" not in ds_Sv or "longitude" not in ds_Sv: - raise ValueError("Both 'latitude' and 'longitude' must exist in the input Sv dataset.") - - # Check if depth vectors are identical within each channel - if not ds_Sv["depth"].groupby("channel").map(check_identical_depth).all(): - raise ValueError( - "Only Sv data with identical depth vectors across all pings " - "are allowed in the current compute_NASC implementation." - ) - - # Get distance from lat/lon in nautical miles - dist_nmi = get_distance_from_latlon(ds_Sv) - - # Find binning indices along distance - bin_num_dist, dist_bin_idx = get_dist_bin_info(dist_nmi, cell_dist) # dist_bin_idx is 1-based - - # Find binning indices along depth: channel-dependent - bin_num_depth, depth_bin_idx = get_depth_bin_info(ds_Sv, cell_depth) # depth_bin_idx is 1-based - - # Compute mean sv (volume backscattering coefficient, linear scale) - # This is essentially to compute MVBS over a the cell defined here, - # which are typically larger than those used for MVBS. - # The implementation below is brute force looping, but can be optimized - # by experimenting with different delayed schemes. - # The optimized routines can then be used here and - # in commongrid.compute_MVBS and clean.estimate_noise - sv_mean = [] - for ch_seq in np.arange(ds_Sv["channel"].size): - # TODO: .compute each channel sequentially? - # dask.delay within each channel? - ds_Sv_ch = ds_Sv["Sv"].isel(channel=ch_seq).data # preserve the underlying type - - sv_mean_dist_depth = [] - for dist_idx in np.arange(bin_num_dist) + 1: # along ping_time - sv_mean_depth = [] - for depth_idx in np.arange(bin_num_depth) + 1: # along depth - # Sv dim: ping_time x depth - Sv_cut = ds_Sv_ch[dist_idx == dist_bin_idx, :][ - :, depth_idx == depth_bin_idx[ch_seq] - ] - sv_mean_depth.append(np.nanmean(10 ** (Sv_cut / 10))) - sv_mean_dist_depth.append(sv_mean_depth) - - sv_mean.append(sv_mean_dist_depth) - - # Compute mean height - # For data with uniform depth step size, mean height = vertical size of cell - height_mean = cell_depth - # TODO: generalize to variable depth step size - - ds_NASC = xr.DataArray( - np.array(sv_mean) * height_mean, - dims=["channel", "distance", "depth"], - coords={ - "channel": ds_Sv["channel"].values, - "distance": np.arange(bin_num_dist) * cell_dist, - "depth": np.arange(bin_num_depth) * cell_depth, - }, - name="NASC", - ).to_dataset() - - ds_NASC["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal - - # Attach attributes - _set_var_attrs( - ds_NASC["NASC"], - long_name="Nautical Areal Scattering Coefficient (NASC, m2 nmi-2)", - units="m2 nmi-2", - round_digits=3, - ) - _set_var_attrs(ds_NASC["distance"], "Cumulative distance", "m", 3) - _set_var_attrs(ds_NASC["depth"], "Cell depth", "m", 3, standard_name="depth") - - # Calculate and add ACDD bounding box global attributes - ds_NASC.attrs["Conventions"] = "CF-1.7,ACDD-1.3" - ds_NASC.attrs["time_coverage_start"] = np.datetime_as_string( - ds_Sv["ping_time"].min().values, timezone="UTC" - ) - ds_NASC.attrs["time_coverage_end"] = np.datetime_as_string( - ds_Sv["ping_time"].max().values, timezone="UTC" - ) - ds_NASC.attrs["geospatial_lat_min"] = round(float(ds_Sv["latitude"].min().values), 5) - ds_NASC.attrs["geospatial_lat_max"] = round(float(ds_Sv["latitude"].max().values), 5) - ds_NASC.attrs["geospatial_lon_min"] = round(float(ds_Sv["longitude"].min().values), 5) - ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5) - - return ds_NASC +# def compute_NASC( +# ds_Sv: xr.Dataset, +# cell_dist: Union[int, float], # TODO: allow xr.DataArray +# cell_depth: Union[int, float], # TODO: allow xr.DataArray +# ) -> xr.Dataset: +# """ +# Compute Nautical Areal Scattering Coefficient (NASC) from an Sv dataset. + +# Parameters +# ---------- +# ds_Sv : xr.Dataset +# A dataset containing Sv data. +# The Sv dataset must contain ``latitude``, ``longitude``, and ``depth`` as data variables. +# cell_dist: int, float +# The horizontal size of each NASC cell, in nautical miles [nmi] +# cell_depth: int, float +# The vertical size of each NASC cell, in meters [m] + +# Returns +# ------- +# xr.Dataset +# A dataset containing NASC + +# Notes +# ----- +# The NASC computation implemented here corresponds to the Echoview algorithm PRC_NASC +# https://support.echoview.com/WebHelp/Reference/Algorithms/Analysis_Variables/PRC_ABC_and_PRC_NASC.htm#PRC_NASC # noqa +# The difference is that since in echopype masking of the Sv dataset is done explicitly using +# functions in the ``mask`` subpackage so the computation only involves computing the +# mean Sv and the mean height within each cell. + +# In addition, here the binning of pings into individual cells is based on the actual horizontal +# distance computed from the latitude and longitude coordinates of each ping in the Sv dataset. +# Therefore, both regular and irregular horizontal distance in the Sv dataset are allowed. +# This is different from Echoview's assumption of constant ping rate, vessel speed, and sample +# thickness when computing mean Sv. +# """ +# # Check Sv contains lat/lon +# if "latitude" not in ds_Sv or "longitude" not in ds_Sv: +# raise ValueError("Both 'latitude' and 'longitude' must exist in the input Sv dataset.") + +# # Check if depth vectors are identical within each channel +# if not ds_Sv["depth"].groupby("channel").map(check_identical_depth).all(): +# raise ValueError( +# "Only Sv data with identical depth vectors across all pings " +# "are allowed in the current compute_NASC implementation." +# ) + +# # Get distance from lat/lon in nautical miles +# dist_nmi = get_distance_from_latlon(ds_Sv) + +# # Find binning indices along distance +# bin_num_dist, dist_bin_idx = get_dist_bin_info(dist_nmi, cell_dist) # dist_bin_idx is 1-based + +# # Find binning indices along depth: channel-dependent +# bin_num_depth, depth_bin_idx = get_depth_bin_info(ds_Sv, cell_depth) # depth_bin_idx is 1-based # noqa + +# # Compute mean sv (volume backscattering coefficient, linear scale) +# # This is essentially to compute MVBS over a the cell defined here, +# # which are typically larger than those used for MVBS. +# # The implementation below is brute force looping, but can be optimized +# # by experimenting with different delayed schemes. +# # The optimized routines can then be used here and +# # in commongrid.compute_MVBS and clean.estimate_noise +# sv_mean = [] +# for ch_seq in np.arange(ds_Sv["channel"].size): +# # TODO: .compute each channel sequentially? +# # dask.delay within each channel? +# ds_Sv_ch = ds_Sv["Sv"].isel(channel=ch_seq).data # preserve the underlying type + +# sv_mean_dist_depth = [] +# for dist_idx in np.arange(bin_num_dist) + 1: # along ping_time +# sv_mean_depth = [] +# for depth_idx in np.arange(bin_num_depth) + 1: # along depth +# # Sv dim: ping_time x depth +# Sv_cut = ds_Sv_ch[dist_idx == dist_bin_idx, :][ +# :, depth_idx == depth_bin_idx[ch_seq] +# ] +# sv_mean_depth.append(np.nanmean(10 ** (Sv_cut / 10))) +# sv_mean_dist_depth.append(sv_mean_depth) + +# sv_mean.append(sv_mean_dist_depth) + +# # Compute mean height +# # For data with uniform depth step size, mean height = vertical size of cell +# height_mean = cell_depth +# # TODO: generalize to variable depth step size + +# ds_NASC = xr.DataArray( +# np.array(sv_mean) * height_mean, +# dims=["channel", "distance", "depth"], +# coords={ +# "channel": ds_Sv["channel"].values, +# "distance": np.arange(bin_num_dist) * cell_dist, +# "depth": np.arange(bin_num_depth) * cell_depth, +# }, +# name="NASC", +# ).to_dataset() + +# ds_NASC["frequency_nominal"] = ds_Sv["frequency_nominal"] # re-attach frequency_nominal + +# # Attach attributes +# _set_var_attrs( +# ds_NASC["NASC"], +# long_name="Nautical Areal Scattering Coefficient (NASC, m2 nmi-2)", +# units="m2 nmi-2", +# round_digits=3, +# ) +# _set_var_attrs(ds_NASC["distance"], "Cumulative distance", "m", 3) +# _set_var_attrs(ds_NASC["depth"], "Cell depth", "m", 3, standard_name="depth") + +# # Calculate and add ACDD bounding box global attributes +# ds_NASC.attrs["Conventions"] = "CF-1.7,ACDD-1.3" +# ds_NASC.attrs["time_coverage_start"] = np.datetime_as_string( +# ds_Sv["ping_time"].min().values, timezone="UTC" +# ) +# ds_NASC.attrs["time_coverage_end"] = np.datetime_as_string( +# ds_Sv["ping_time"].max().values, timezone="UTC" +# ) +# ds_NASC.attrs["geospatial_lat_min"] = round(float(ds_Sv["latitude"].min().values), 5) +# ds_NASC.attrs["geospatial_lat_max"] = round(float(ds_Sv["latitude"].max().values), 5) +# ds_NASC.attrs["geospatial_lon_min"] = round(float(ds_Sv["longitude"].min().values), 5) +# ds_NASC.attrs["geospatial_lon_max"] = round(float(ds_Sv["longitude"].max().values), 5) + +# return ds_NASC def regrid(): diff --git a/echopype/commongrid/nasc.py b/echopype/commongrid/nasc.py index b32708679..2656f2bc7 100644 --- a/echopype/commongrid/nasc.py +++ b/echopype/commongrid/nasc.py @@ -1,4 +1,9 @@ """ +An overhaul is required for the below compute_NASC implementation, to: +- increase the computational efficiency +- debug the current code of any discrepancy against Echoview implementation +- potentially provide an alternative based on ping-by-ping Sv + This script contains functions used by commongrid.compute_NASC, but a subset of these overlap with operations needed for commongrid.compute_MVBS and clean.estimate_noise. diff --git a/echopype/tests/commongrid/test_nasc.py b/echopype/tests/commongrid/test_nasc.py index 8f8a1ff41..b950e7fd0 100644 --- a/echopype/tests/commongrid/test_nasc.py +++ b/echopype/tests/commongrid/test_nasc.py @@ -19,20 +19,20 @@ def ek60_path(test_path): return test_path['EK60'] -def test_compute_NASC(ek60_path): - raw_path = ek60_path / "ncei-wcsd/Summer2017-D20170620-T011027.raw" - - ed = open_raw(raw_path, sonar_model="EK60") - ds_Sv = add_depth(add_location(compute_Sv(ed), ed, nmea_sentence="GGA")) - cell_dist = 0.1 - cell_depth = 20 - ds_NASC = compute_NASC(ds_Sv, cell_dist, cell_depth) - - dist_nmi = get_distance_from_latlon(ds_Sv) - - # Check dimensions - da_NASC = ds_NASC["NASC"] - assert da_NASC.dims == ("channel", "distance", "depth") - assert np.all(ds_NASC["channel"].values == ds_Sv["channel"].values) - assert da_NASC["depth"].size == np.ceil(ds_Sv["depth"].max() / cell_depth) - assert da_NASC["distance"].size == np.ceil(dist_nmi.max() / cell_dist) +# def test_compute_NASC(ek60_path): +# raw_path = ek60_path / "ncei-wcsd/Summer2017-D20170620-T011027.raw" + +# ed = open_raw(raw_path, sonar_model="EK60") +# ds_Sv = add_depth(add_location(compute_Sv(ed), ed, nmea_sentence="GGA")) +# cell_dist = 0.1 +# cell_depth = 20 +# ds_NASC = compute_NASC(ds_Sv, cell_dist, cell_depth) + +# dist_nmi = get_distance_from_latlon(ds_Sv) + +# # Check dimensions +# da_NASC = ds_NASC["NASC"] +# assert da_NASC.dims == ("channel", "distance", "depth") +# assert np.all(ds_NASC["channel"].values == ds_Sv["channel"].values) +# assert da_NASC["depth"].size == np.ceil(ds_Sv["depth"].max() / cell_depth) +# assert da_NASC["distance"].size == np.ceil(dist_nmi.max() / cell_dist)