Skip to content

Commit

Permalink
Merge pull request #202 from DHI/skill_df
Browse files Browse the repository at this point in the history
Testable skill table
  • Loading branch information
ecomodeller authored Jun 12, 2023
2 parents b30ae7f + 2d85649 commit 5b8bc1f
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 31 deletions.
38 changes: 37 additions & 1 deletion modelskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
0.6666666666666666
"""
import sys
from typing import Set, Tuple
from typing import Callable, Set, Tuple, Union
import warnings
import numpy as np

Expand Down Expand Up @@ -562,6 +562,42 @@ def _std_mod(obs: np.ndarray, model: np.ndarray) -> float:
return model.std()


METRICS_WITH_DIMENSION = set(["urmse", "rmse", "bias", "mae"]) # TODO is this complete?


def metric_has_units(metric: Union[str, Callable]) -> bool:
"""Check if a metric has units (dimension).
Some metrics are dimensionless, others have the same dimension as the observations.
Parameters
----------
metric : str or callable
Metric name or function
Returns
-------
bool
True if metric has a dimension, False otherwise
Examples
--------
>>> metric_has_units("rmse")
True
>>> metric_has_units("kge")
False
"""
if isinstance(metric, Callable):
name = metric.__name__
else:
name = metric

if name not in DEFINED_METRICS:
raise ValueError(f"Metric {name} not defined. Choose from {DEFINED_METRICS}")

return name in METRICS_WITH_DIMENSION


DEFINED_METRICS: Set[str] = set(
[
func
Expand Down
88 changes: 58 additions & 30 deletions modelskill/plot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import List, Tuple, Union, Optional, Sequence
import math
from typing import List, Tuple, Union, Optional, Optional, Sequence
import warnings
from matplotlib.axes import Axes
import numpy as np
import pandas as pd
import pandas as pd
from collections import namedtuple
from scipy import interpolate

Expand All @@ -15,6 +17,8 @@
from .plot_taylor import TaylorDiagram
import modelskill.settings as settings
from .settings import options, register_option
from .observation import unit_display_name
from .metrics import metric_has_units


register_option("plot.scatter.points.size", 20, validator=settings.is_positive)
Expand Down Expand Up @@ -126,7 +130,7 @@ def quantiles_xy(
Parameters
----------
x: np.ndarray, 1d
x: np.ndarray, 1d
y: np.ndarray, 1d
q: int, Sequence[float]
quantiles to calculate
Expand All @@ -147,10 +151,11 @@ def quantiles_xy(

if not isinstance(quantiles, (int, Sequence)):
raise TypeError("quantiles must be an int or sequence of floats")

q = np.linspace(0, 1, num=quantiles) if isinstance(quantiles, int) else quantiles
return np.quantile(x, q=q), np.quantile(y, q=q)


def _scatter_matplotlib(
*,
x,
Expand Down Expand Up @@ -241,11 +246,13 @@ def _scatter_matplotlib(
max_cbar = ticks[-1]
cbar.set_label("# points")

plt.title(title)
# Add skill table
if skill_df is not None:
_plot_summary_table(skill_df, units, max_cbar=max_cbar)
return ax
plt.title(title)
# Add skill table
if skill_df is not None:
df = skill_df.df
assert isinstance(df, pd.DataFrame)
_plot_summary_table(df, units, max_cbar=max_cbar)
return ax


def _scatter_plotly(
Expand Down Expand Up @@ -475,7 +482,7 @@ def scatter(
ymin, ymax = y.min(), y.max()
xymin = min([xmin, ymin])
xymax = max([xmax, ymax])

nbins_hist, binsize = _get_bins(bins, xymin=xymin, xymax=xymax)

if xlim is None:
Expand All @@ -484,7 +491,6 @@ def scatter(
if ylim is None:
ylim = (xymin - binsize, xymax + binsize)


x_trend = np.array([xlim[0], xlim[1]])

if show_hist and show_density:
Expand Down Expand Up @@ -717,29 +723,51 @@ def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"):
return Z_grid


def _plot_summary_table(skill_df, units, max_cbar):
stats_with_units = ["bias", "rmse", "urmse", "mae"]
max_str_len = skill_df.columns.str.len().max()
lines = []
def _format_skill_line(
series: pd.Series, units: str, precision: int, max_str_len: int
) -> str:

for col in skill_df.columns:
if col == "model" or col == "variable":
continue
if col in stats_with_units:
name = series.name

item_unit = " "

if name == "n":
fvalue = series.values[0]
else:
if metric_has_units(metric=name):
# if statistic has dimensions, then add units
item_unit = units
else:
# else, add empty space (for fomatting)
item_unit = " "
if col == "n":
# Number of samples, integer, else, 2 decimals
decimals = f".{0}f"
else:
decimals = f".{2}f"
lines.append(
f"{(col.ljust(max_str_len)).upper()} = {np.round(skill_df[col].values[0],2): {decimals}} {item_unit}"
)
item_unit = unit_display_name(units)

rounded_value = np.round(series.values[0], precision)
fmt = f".{precision}f"
fvalue = f"{rounded_value:{fmt}}"

name = series.name.ljust(max_str_len).upper()

return f"{name} = {fvalue} {item_unit}"


def format_skill_df(df: pd.DataFrame, units: str, precision: int = 2) -> List[str]:

max_str_len = df.columns.str.len().max()

# remove model and variable columns if present, i.e. keep all other columns
df.drop(["model", "variable"], axis=1, errors="ignore", inplace=True)

# loop over series in dataframe, (columns)
lines = [
_format_skill_line(df[col], units, precision, max_str_len)
for col in list(df.columns)
]

return lines


def _plot_summary_table(
df: pd.DataFrame, units: str, max_cbar: Optional[float] = None
) -> None:

lines = format_skill_df(df, units)
text_ = "\n".join(lines)

if max_cbar is None:
Expand Down
19 changes: 19 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,22 @@ def test_willmott():
mod = np.array([1.0, 0.0, 0.5]) # mean 0.5

assert mtr.willmott(obs, mod) == pytest.approx(1 - 0.5 / 1.5)


def test_metric_has_dimension():

# the following metrics are dimensionless

assert not mtr.metric_has_units("nse")
assert not mtr.metric_has_units(mtr.nash_sutcliffe_efficiency)
assert not mtr.metric_has_units("kge")
assert not mtr.metric_has_units("r2")

# while these metrics are in units of the observations
assert mtr.metric_has_units("mae")
assert mtr.metric_has_units("bias")
assert mtr.metric_has_units("rmse")
assert mtr.metric_has_units(mtr.rmse)

with pytest.raises(ValueError):
mtr.metric_has_units("unknown")
41 changes: 41 additions & 0 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,50 @@
import numpy as np
import pytest
import pandas as pd

from modelskill.plot import format_skill_df
from modelskill.plot import sample_points


def test_format_skill_df():

#
# n bias rmse urmse mae cc si r2
# observation
# smhi_2095_klagshamn 167 1.033099e-09 0.040645 0.040645 0.033226 0.841135 0.376413 0.706335

df = pd.DataFrame(
{
"n": [167],
"bias": [1.033099e-09],
"rmse": [0.040645],
"urmse": [0.040645],
"mae": [0.033226],
"cc": [0.841135],
"si": [0.376413],
"r2": [0.706335],
},
index=["smhi_2095_klagshamn"],
)

lines = format_skill_df(df, units="degC")
assert "N = 167" in lines[0]
assert "BIAS = 0.00 degC" in lines[1]
assert "RMSE = 0.04 degC" in lines[2]
assert "URMSE = 0.04 degC" in lines[3]
assert "MAE = 0.03 degC" in lines[4]
assert "CC = 0.84 " in lines[5]

lines_with_short_units = format_skill_df(df, units="meter")

assert "N = 167" in lines_with_short_units[0]
assert "BIAS = 0.00 m" in lines_with_short_units[1]
assert "RMSE = 0.04 m" in lines_with_short_units[2]
assert "URMSE = 0.04 m" in lines_with_short_units[3]
assert "MAE = 0.03 m" in lines_with_short_units[4]
assert "CC = 0.84 " in lines_with_short_units[5]


@pytest.fixture
def x_y():
np.random.seed(42)
Expand Down

0 comments on commit 5b8bc1f

Please sign in to comment.