Merge pull request #202 from DHI/skill_df

Testable skill table
DHI · Jun 12, 2023 · 5b8bc1f · 5b8bc1f
2 parents b30ae7f + 2d85649
commit 5b8bc1f
Show file tree

Hide file tree

Showing 4 changed files with 155 additions and 31 deletions.
diff --git a/modelskill/metrics.py b/modelskill/metrics.py
@@ -55,7 +55,7 @@
 0.6666666666666666
 """
 import sys
-from typing import Set, Tuple
+from typing import Callable, Set, Tuple, Union
 import warnings
 import numpy as np
 
@@ -562,6 +562,42 @@ def _std_mod(obs: np.ndarray, model: np.ndarray) -> float:
     return model.std()
 
 
+METRICS_WITH_DIMENSION = set(["urmse", "rmse", "bias", "mae"])  # TODO is this complete?
+
+
+def metric_has_units(metric: Union[str, Callable]) -> bool:
+    """Check if a metric has units (dimension).
+
+    Some metrics are dimensionless, others have the same dimension as the observations.
+
+    Parameters
+    ----------
+    metric : str or callable
+        Metric name or function
+
+    Returns
+    -------
+    bool
+        True if metric has a dimension, False otherwise
+
+    Examples
+    --------
+    >>> metric_has_units("rmse")
+    True
+    >>> metric_has_units("kge")
+    False
+    """
+    if isinstance(metric, Callable):
+        name = metric.__name__
+    else:
+        name = metric
+
+    if name not in DEFINED_METRICS:
+        raise ValueError(f"Metric {name} not defined. Choose from {DEFINED_METRICS}")
+
+    return name in METRICS_WITH_DIMENSION
+
+
 DEFINED_METRICS: Set[str] = set(
     [
         func

diff --git a/modelskill/plot.py b/modelskill/plot.py
@@ -1,8 +1,10 @@
-from typing import List, Tuple, Union, Optional, Sequence
+import math
+from typing import List, Tuple, Union, Optional, Optional, Sequence
 import warnings
 from matplotlib.axes import Axes
 import numpy as np
 import pandas as pd
+import pandas as pd
 from collections import namedtuple
 from scipy import interpolate
 
@@ -15,6 +17,8 @@
 from .plot_taylor import TaylorDiagram
 import modelskill.settings as settings
 from .settings import options, register_option
+from .observation import unit_display_name
+from .metrics import metric_has_units
 
 
 register_option("plot.scatter.points.size", 20, validator=settings.is_positive)
@@ -126,7 +130,7 @@ def quantiles_xy(
 
     Parameters
     ----------
-    x: np.ndarray, 1d        
+    x: np.ndarray, 1d
     y: np.ndarray, 1d
     q: int, Sequence[float]
         quantiles to calculate
@@ -147,10 +151,11 @@ def quantiles_xy(
 
     if not isinstance(quantiles, (int, Sequence)):
         raise TypeError("quantiles must be an int or sequence of floats")
-    
+
     q = np.linspace(0, 1, num=quantiles) if isinstance(quantiles, int) else quantiles
     return np.quantile(x, q=q), np.quantile(y, q=q)
 
+
 def _scatter_matplotlib(
     *,
     x,
@@ -241,11 +246,13 @@ def _scatter_matplotlib(
         max_cbar = ticks[-1]
         cbar.set_label("# points")
 
-    plt.title(title)
-    # Add skill table
-    if skill_df is not None:
-        _plot_summary_table(skill_df, units, max_cbar=max_cbar)
-    return ax
+        plt.title(title)
+        # Add skill table
+        if skill_df is not None:
+            df = skill_df.df
+            assert isinstance(df, pd.DataFrame)
+            _plot_summary_table(df, units, max_cbar=max_cbar)
+        return ax
 
 
 def _scatter_plotly(
@@ -475,7 +482,7 @@ def scatter(
     ymin, ymax = y.min(), y.max()
     xymin = min([xmin, ymin])
     xymax = max([xmax, ymax])
-    
+
     nbins_hist, binsize = _get_bins(bins, xymin=xymin, xymax=xymax)
 
     if xlim is None:
@@ -484,7 +491,6 @@ def scatter(
     if ylim is None:
         ylim = (xymin - binsize, xymax + binsize)
 
-
     x_trend = np.array([xlim[0], xlim[1]])
 
     if show_hist and show_density:
@@ -717,29 +723,51 @@ def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"):
     return Z_grid
 
 
-def _plot_summary_table(skill_df, units, max_cbar):
-    stats_with_units = ["bias", "rmse", "urmse", "mae"]
-    max_str_len = skill_df.columns.str.len().max()
-    lines = []
+def _format_skill_line(
+    series: pd.Series, units: str, precision: int, max_str_len: int
+) -> str:
 
-    for col in skill_df.columns:
-        if col == "model" or col == "variable":
-            continue
-        if col in stats_with_units:
+    name = series.name
+
+    item_unit = " "
+
+    if name == "n":
+        fvalue = series.values[0]
+    else:
+        if metric_has_units(metric=name):
             # if statistic has dimensions, then add units
-            item_unit = units
-        else:
-            # else, add empty space (for fomatting)
-            item_unit = " "
-        if col == "n":
-            # Number of samples, integer, else, 2 decimals
-            decimals = f".{0}f"
-        else:
-            decimals = f".{2}f"
-        lines.append(
-            f"{(col.ljust(max_str_len)).upper()} = {np.round(skill_df[col].values[0],2): {decimals}} {item_unit}"
-        )
+            item_unit = unit_display_name(units)
+
+        rounded_value = np.round(series.values[0], precision)
+        fmt = f".{precision}f"
+        fvalue = f"{rounded_value:{fmt}}"
+
+    name = series.name.ljust(max_str_len).upper()
+
+    return f"{name} =  {fvalue} {item_unit}"
+
+
+def format_skill_df(df: pd.DataFrame, units: str, precision: int = 2) -> List[str]:
+
+    max_str_len = df.columns.str.len().max()
+
+    # remove model and variable columns if present, i.e. keep all other columns
+    df.drop(["model", "variable"], axis=1, errors="ignore", inplace=True)
+
+    # loop over series in dataframe, (columns)
+    lines = [
+        _format_skill_line(df[col], units, precision, max_str_len)
+        for col in list(df.columns)
+    ]
+
+    return lines
+
+
+def _plot_summary_table(
+    df: pd.DataFrame, units: str, max_cbar: Optional[float] = None
+) -> None:
 
+    lines = format_skill_df(df, units)
     text_ = "\n".join(lines)
 
     if max_cbar is None:

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -174,3 +174,22 @@ def test_willmott():
     mod = np.array([1.0, 0.0, 0.5])  # mean 0.5
 
     assert mtr.willmott(obs, mod) == pytest.approx(1 - 0.5 / 1.5)
+
+
+def test_metric_has_dimension():
+
+    # the following metrics are dimensionless
+
+    assert not mtr.metric_has_units("nse")
+    assert not mtr.metric_has_units(mtr.nash_sutcliffe_efficiency)
+    assert not mtr.metric_has_units("kge")
+    assert not mtr.metric_has_units("r2")
+
+    # while these metrics are in units of the observations
+    assert mtr.metric_has_units("mae")
+    assert mtr.metric_has_units("bias")
+    assert mtr.metric_has_units("rmse")
+    assert mtr.metric_has_units(mtr.rmse)
+
+    with pytest.raises(ValueError):
+        mtr.metric_has_units("unknown")
diff --git a/tests/test_plot.py b/tests/test_plot.py
@@ -1,9 +1,50 @@
 import numpy as np
 import pytest
+import pandas as pd
 
+from modelskill.plot import format_skill_df
 from modelskill.plot import sample_points
 
 
+def test_format_skill_df():
+
+    #
+    #    	            n	bias	rmse	urmse	mae	cc	si	r2
+    # observation
+    # smhi_2095_klagshamn	167	1.033099e-09	0.040645	0.040645	0.033226	0.841135	0.376413	0.706335
+
+    df = pd.DataFrame(
+        {
+            "n": [167],
+            "bias": [1.033099e-09],
+            "rmse": [0.040645],
+            "urmse": [0.040645],
+            "mae": [0.033226],
+            "cc": [0.841135],
+            "si": [0.376413],
+            "r2": [0.706335],
+        },
+        index=["smhi_2095_klagshamn"],
+    )
+
+    lines = format_skill_df(df, units="degC")
+    assert "N     =  167" in lines[0]
+    assert "BIAS  =  0.00 degC" in lines[1]
+    assert "RMSE  =  0.04 degC" in lines[2]
+    assert "URMSE =  0.04 degC" in lines[3]
+    assert "MAE   =  0.03 degC" in lines[4]
+    assert "CC    =  0.84 " in lines[5]
+
+    lines_with_short_units = format_skill_df(df, units="meter")
+
+    assert "N     =  167" in lines_with_short_units[0]
+    assert "BIAS  =  0.00 m" in lines_with_short_units[1]
+    assert "RMSE  =  0.04 m" in lines_with_short_units[2]
+    assert "URMSE =  0.04 m" in lines_with_short_units[3]
+    assert "MAE   =  0.03 m" in lines_with_short_units[4]
+    assert "CC    =  0.84 " in lines_with_short_units[5]
+
+
 @pytest.fixture
 def x_y():
     np.random.seed(42)