Lightning-AI · Borda · Jun 22, 2021 · Jun 13, 2021 · Jun 13, 2021 · Jun 13, 2021
@@ -0,0 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torchmetrics.functional.audio.si_sdr import si_sdr
+from torchmetrics.functional.audio.si_snr import si_snr
+from torchmetrics.functional.audio.snr import snr
@@ -0,0 +1,60 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+
+def si_sdr(target, estimate, zero_mean=False, EPS=1e-8):
+    """ scale-invariant signal-to-distortion ratio (SI-SDR)
+
+    Args:
+        target (Tensor): shape [..., time]
+        estimate (Tensor): shape [..., time]
+        zero_mean (Bool): if to zero mean target and estimate or not
+        EPS (float, optional): a small value for numerical stability. Defaults to 1e-8.
+
+    Raises:
+        TypeError: if target and estimate have a different shape
+
+    Returns:
+        Tensor: si-sdr value has a shape of [...]
+
+    Example:
+        >>> from torchmetrics.functional.audio import si_sdr
+        >>> target = torch.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> estimate = torch.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_sdr_val = si_sdr(target,estimate)
+        >>> si_sdr_val
+        tensor(18.4030)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+         and Signal Processing (ICASSP) 2019.
+    """
+
+    if target.shape != estimate.shape:
+        raise TypeError(f"Inputs must be of shape [..., time], got {target.shape} and {estimate.shape} instead")
+
+    if zero_mean:
+        target = target - torch.mean(target, dim=-1, keepdim=True)
+        estimate = estimate - torch.mean(estimate, dim=-1, keepdim=True)
+
+    α = torch.sum(estimate * target, dim=-1, keepdim=True) / (torch.sum(target**2, dim=-1, keepdim=True) + EPS)
+    target_scaled = α * target
+
+    noise = target_scaled - estimate
+
+    si_sdr_value = torch.sum(target_scaled**2, dim=-1) / (torch.sum(noise**2, dim=-1) + EPS)
+    si_sdr_value = 10 * torch.log10(si_sdr_value + EPS)
+
+    return si_sdr_value
@@ -0,0 +1,47 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from .si_sdr import si_sdr
+
+
+def si_snr(target, estimate, EPS=1e-8):
+    """ scale-invariant signal-to-noise ratio (SI-SNR)
+
+    Args:
+        target (Tensor): shape [..., time]
+        estimate (Tensor): shape [..., time]
+        EPS (float, optional): a small value for numerical stability. Defaults to 1e-8.
+
+    Raises:
+        TypeError: if target and estimate have a different shape
+
+    Returns:
+        Tensor: si-snr value has a shape of [...]
+
+    Example:
+        >>> from torchmetrics.functional.audio import si_snr
+        >>> target = torch.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> estimate = torch.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> si_snr_val = si_snr(target,estimate)
+        >>> si_snr_val
+        tensor(15.0918)
+
+    References:
+        [1] Y. Luo and N. Mesgarani, "TaSNet: Time-Domain Audio Separation Network for Real-Time, Single-Channel Speech
+         Separation," 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018, pp.
+         696-700, doi: 10.1109/ICASSP.2018.8462116.
+    """
+
+    return si_sdr(target=target, estimate=estimate, zero_mean=True, EPS=EPS)
@@ -0,0 +1,58 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import Tensor
+
+
+def snr(target, estimate, zero_mean=False, EPS=1e-8) -> Tensor:
+    """ signal-to-noise ratio (SNR)
+
+    Args:
+        target (Tensor): shape [..., time]
+        estimate (Tensor): shape [..., time]
+        zero_mean (Bool): if to zero mean target and estimate or not
+        EPS (float, optional): a small value for numerical stability. Defaults to 1e-8.
+
+    Raises:
+        TypeError: if target and estimate have a different shape
+
+    Returns:
+        Tensor: snr value has a shape of [...]
+
+    Example:
+        >>> from torchmetrics.functional.audio import snr
+        >>> target = torch.tensor([3.0, -0.5, 2.0, 7.0])
+        >>> estimate = torch.tensor([2.5, 0.0, 2.0, 8.0])
+        >>> snr_val = snr(target,estimate)
+        >>> snr_val
+        tensor(16.1805)
+
+    References:
+        [1] Le Roux, Jonathan, et al. "SDR half-baked or well done." IEEE International Conference on Acoustics, Speech
+         and Signal Processing (ICASSP) 2019.
+    """
+
+    if target.shape != estimate.shape:
+        raise TypeError(f"Inputs must be of shape [..., time], got {target.shape} and {estimate.shape} instead")
+
+    if zero_mean:
+        target = target - torch.mean(target, dim=-1, keepdim=True)
+        estimate = estimate - torch.mean(estimate, dim=-1, keepdim=True)
+
+    noise = target - estimate
+
+    snr_value = torch.sum(target**2, dim=-1) / (torch.sum(noise**2, dim=-1) + EPS)
+    snr_value = 10 * torch.log10(snr_value + EPS)
+
+    return snr_value