Skip to content

Commit

Permalink
Merge pull request #1172 from metno/mos_tests
Browse files Browse the repository at this point in the history
Add tests that perform real mos evaluations
  • Loading branch information
charlienegri authored May 31, 2024
2 parents 214a4fa + 1a67c46 commit b9c3b6c
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 51 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ jobs:
- name: Install pyaerocom
run: python -m pip install . --no-deps
- name: Run pytest
run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml
run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml
7 changes: 2 additions & 5 deletions pyaerocom/scripts/cams2_83/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from datetime import date, timedelta
from enum import Enum
Expand Down Expand Up @@ -179,7 +178,7 @@ def runnermos(

logger.info("Running Statistics (MOS)")
ExperimentProcessor(stp).run()
print("Done Running Statistics (MOS)")
logger.info("Done Running Statistics (MOS)")


def runnermedianscores(
Expand All @@ -198,8 +197,6 @@ def runnermedianscores(

stp = EvalSetup(**cfg)

start = time.time()

logger.info(
"Running CAMS2_83 Specific Statistics, cache is not cleared, colocated data is assumed in place, regular statistics are assumed to have been run"
)
Expand All @@ -216,4 +213,4 @@ def runnermedianscores(
logger.info(f"Making median scores plot with pool {pool} and analysis {analysis}")
CAMS2_83_Processer(stp).run(analysis=analysis)

print(f"Long run: {time.time() - start} sec")
logger.info("Median scores run finished")
42 changes: 3 additions & 39 deletions tests/cams2_83/test_cams2_83_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,7 @@
runner = CliRunner()


@pytest.fixture()
def fake_config(monkeypatch, patched_config):
def fake_make_config(*args, **kwargs):
return patched_config

monkeypatch.setattr("pyaerocom.scripts.cams2_83.cli.make_config", fake_make_config)


@pytest.mark.usefixtures("fake_ExperimentProcessor", "reset_cachedir")
def test_clearcache(
monkeypatch,
fake_cache_path: Path,
Expand All @@ -26,23 +19,15 @@ def test_clearcache(
):
assert list(fake_cache_path.glob("*.pkl"))

def do_not_run(self, model_name=None, obs_name=None, var_list=None, update_interface=True):
assert model_name is None
assert obs_name is None
assert var_list is None
assert update_interface is True

monkeypatch.setattr(
"pyaerocom.scripts.cams2_83.evaluation.ExperimentProcessor.run", do_not_run
)
options = f"forecast week 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'"
options = f"forecast week 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --cache {fake_cache_path} --name 'Test'"
result = runner.invoke(app, options.split())
assert "Running Statistics" in caplog.text
assert result.exit_code == 0
# Check that the cache is cleared
assert not list(fake_cache_path.glob("*.pkl"))


@pytest.mark.usefixtures("fake_CAMS2_83_Processer", "reset_cachedir")
def test_not_cleared_cache(
monkeypatch,
fake_cache_path: Path,
Expand All @@ -51,21 +36,6 @@ def test_not_cleared_cache(
):
assert list(fake_cache_path.glob("*.pkl"))

def do_not_run(
self,
model_name=None,
obs_name=None,
var_list=None,
update_interface=True,
analysis=False,
):
assert model_name is None
assert obs_name is None
assert var_list is None
assert analysis is False
assert update_interface is True

monkeypatch.setattr("pyaerocom.scripts.cams2_83.evaluation.CAMS2_83_Processer.run", do_not_run)
options = f"forecast long 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test' --medianscores"
result = runner.invoke(app, options.split())
assert "Running CAMS2_83 Specific Statistics, cache is not cleared" in caplog.text
Expand All @@ -75,25 +45,19 @@ def do_not_run(


def test_eval_dummy(
fake_cache_path: Path,
tmp_path: Path,
caplog,
):
assert list(fake_cache_path.glob("*.pkl"))

options = f"forecast day 2024-03-16 2024-03-16 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'"
result = runner.invoke(app, options.split())
assert result.exit_code == 0
assert "Failed to read model variable" in caplog.text


def test_eval_medianscores_dummy(
fake_cache_path: Path,
tmp_path: Path,
caplog,
):
assert list(fake_cache_path.glob("*.pkl"))

options = f"analysis long 2023-03-01 2024-02-28 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test' --medianscores"
result = runner.invoke(app, options.split())
assert result.exit_code == 0
Expand Down
64 changes: 59 additions & 5 deletions tests/cams2_83/test_cams2_83_cli_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,75 @@


@pytest.fixture()
def fake_config(monkeypatch, patched_config):
def fake_config(monkeypatch, patched_config_mos):
def fake_make_config(*args, **kwargs):
return patched_config
return patched_config_mos

monkeypatch.setattr("pyaerocom.scripts.cams2_83.cli_mos.make_config_mos", fake_make_config)


def test_eval_mos_dummy(
fake_cache_path: Path,
tmp_path: Path,
caplog,
):
assert list(fake_cache_path.glob("*.pkl"))

options = f"season 2024-03-01 2024-05-12 --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'"
result = runner.invoke(app, options.split())
assert result.exit_code == 0
assert "no output available" in caplog.text


@pytest.mark.usefixtures("fake_CAMS2_83_Processer", "reset_cachedir")
def test_eval_mos_standard(tmp_path: Path, coldata_mos: Path, caplog):
options = f"day 2024-03-01 2024-03-01 --data-path {tmp_path} --coldata-path {coldata_mos} --cache {tmp_path} --id mos-colocated-data --name 'Test'"
result = runner.invoke(app, options.split())
assert result.exit_code == 0

map_dir = tmp_path / "cams2-83/mos-colocated-data/map"
assert map_dir.is_dir()

ts_st1 = tmp_path / "cams2-83/mos-colocated-data/ts/AT0ENK1_EEA-NRT-concno2_Surface.json"
assert ts_st1.is_file()

ts_st2 = tmp_path / "cams2-83/mos-colocated-data/ts/AT0ILL1_EEA-NRT-concno2_Surface.json"
assert ts_st2.is_file()

ts_st3 = tmp_path / "cams2-83/mos-colocated-data/ts/XK0012A_EEA-NRT-concno2_Surface.json"
assert ts_st3.is_file()

hm_dir = tmp_path / "cams2-83/mos-colocated-data/hm"
assert hm_dir.is_dir()

scat_dir = tmp_path / "cams2-83/mos-colocated-data/scat"
assert scat_dir.is_dir()

contour_dir = tmp_path / "cams2-83/mos-colocated-data/contour"
assert contour_dir.is_dir()

fc_dir = tmp_path / "cams2-83/mos-colocated-data/forecast"
assert fc_dir.is_dir()

cfg_out = tmp_path / "cams2-83/mos-colocated-data/cfg_cams2-83_mos-colocated-data.json"
assert cfg_out.is_file()

colfileE = f"{coldata_mos}/cams2-83/mos-colocated-data/ENS/concno2_concno2_MOD-ENS_REF-EEA-NRT_20240301_20240301_hourly_ALL-wMOUNTAINS.nc"
colfileM = f"{coldata_mos}/cams2-83/mos-colocated-data/MOS/concno2_concno2_MOD-MOS_REF-EEA-NRT_20240301_20240301_hourly_ALL-wMOUNTAINS.nc"

assert "Running Statistics (MOS)" in caplog.text
assert f"Processing: {colfileE}" in caplog.text
assert f"Processing: {colfileM}" in caplog.text
assert "Finished processing" in caplog.text
assert "Done Running Statistics (MOS)" in caplog.text


@pytest.mark.usefixtures("fake_ExperimentProcessor", "reset_cachedir")
def test_eval_mos_medianscores(tmp_path: Path, coldata_mos: Path, caplog):
options = f"season 2024-03-01 2024-03-05 --data-path {tmp_path} --coldata-path {coldata_mos} --cache {tmp_path} --id mos-colocated-data --name 'Test'"
result = runner.invoke(app, options.split())
assert result.exit_code == 0
fc_out = tmp_path / "cams2-83/mos-colocated-data/forecast/ALL_EEA-NRT-concno2_Surface.json"
assert fc_out.is_file()
assert "Running CAMS2_83 Specific Statistics" in caplog.text
assert "Processing Component: concno2"
assert "Making subset for ALL, 2024/03/01-2024/03/05 and all" in caplog.text
assert "Finished processing" in caplog.text
assert "Median scores run finished" in caplog.text
6 changes: 6 additions & 0 deletions tests/fixtures/cams2_83/cfg_test_mos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CFG = {
"proj_id": "cams2-83",
"exp_id": "mos-colocated-data",
"use_cams2_83": True,
"species_list": ["concno2"],
}
132 changes: 131 additions & 1 deletion tests/fixtures/cams2_83/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from __future__ import annotations

import os
from datetime import date, timedelta
from itertools import product
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import xarray as xr

from . import cfg_test
from pyaerocom import const

from . import cfg_test, cfg_test_mos


@pytest.fixture()
Expand All @@ -18,8 +26,130 @@ def fake_cache_path(monkeypatch, tmp_path: Path):
return tmp_path


@pytest.fixture
def reset_cachedir():
cache = const.CACHEDIR
yield
const.CACHEDIR = cache


@pytest.fixture
def patched_config():
cfg = cfg_test.CFG
assert cfg["proj_id"] == "cams2-83"
return cfg


@pytest.fixture
def patched_config_mos():
cfg = cfg_test_mos.CFG
assert cfg["exp_id"] == "mos-colocated-data"
return cfg


@pytest.fixture
def fake_CAMS2_83_Processer(monkeypatch):
def do_not_run(
self,
model_name=None,
obs_name=None,
var_list=None,
update_interface=True,
analysis=False,
):
assert model_name is None
assert obs_name is None
assert var_list is None
assert analysis is False
assert update_interface is True

monkeypatch.setattr("pyaerocom.scripts.cams2_83.evaluation.CAMS2_83_Processer.run", do_not_run)


@pytest.fixture
def fake_ExperimentProcessor(monkeypatch):
def do_not_run(self, model_name=None, obs_name=None, var_list=None, update_interface=True):
assert model_name is None
assert obs_name is None
assert var_list is None
assert update_interface is True

monkeypatch.setattr(
"pyaerocom.scripts.cams2_83.evaluation.ExperimentProcessor.run", do_not_run
)


@pytest.fixture(scope="module")
def coldata_mos(tmp_path_factory) -> Path:
root: Path = tmp_path_factory.mktemp("data")

def dataset(model: str, day: int, start: date, end: date) -> xr.Dataset:
hours = (end - start) // timedelta(hours=1) + 1
ds = xr.Dataset(
data_vars=dict(
concno2=xr.Variable(
("data_source", "time", "station_name"),
np.zeros((2, hours, 3)),
{
"ts_type": "hourly",
"filter_name": "ALL-wMOUNTAINS",
"ts_type_src": ["hourly", "hourly"],
"var_units": ["ug m-3", "ug m-3"],
"data_level": 3,
"revision_ref": "n/a",
"from_files": "",
"from_files_ref": "None",
"colocate_time": 0,
"obs_is_clim": 0,
"pyaerocom": "0.18.dev0",
"CONV!min_num_obs": str(dict(daily=dict(hourly=18))),
"resample_how": "None",
"obs_name": "EEA-NRT",
"vert_code": "Surface",
"diurnal_only": 0,
"zeros_to_nan": 1,
},
)
),
coords=dict(
data_source=xr.Variable(
"data_source", ["CAMS2_83.NRT", f"CAMS2-83.{model}.day{day}.FC"]
),
station_name=xr.Variable("station_name", ["AT0ENK1", "AT0ILL1", "XK0012A"]),
latitude=xr.Variable("station_name", [48.39, 47.77, 42.66]),
longitude=xr.Variable("station_name", [13.67, 16.77, 21.08]),
altitude=xr.Variable("station_name", [525, 117, 529]),
time=xr.Variable("time", pd.date_range(start, end, freq="1h")),
),
)

ds["concno2"].attrs.update(
data_source=ds["data_source"].values.tolist(),
var_name=["concno2", "concno2"],
var_name_input=["concno2", "concno2"],
model_name=f"CAMS2-83-{model}-day{day}-FC",
)

return ds

start, end = date(2024, 3, 1), date(2024, 3, 5)
for model, day in product(("ENS", "MOS"), range(4)):
path = (
root
/ f"cams2-83/mos-colocated-data/CAMS2-83-{model}-day{day}-FC/concno2_concno2_MOD-CAMS2-83-{model}-day{day}-FC_REF-EEA-NRT_{start:%Y%m%d}_{end:%Y%m%d}_hourly_ALL-wMOUNTAINS.nc"
)
path.parent.mkdir(exist_ok=True, parents=True)
dataset(model, day, start, end).to_netcdf(path)

start, end = date(2024, 3, 1), date(2024, 3, 2)
for model in ("ENS", "MOS"):
path = (
root
/ f"cams2-83/mos-colocated-data/{model}/concno2_concno2_MOD-{model}_REF-EEA-NRT_{start:%Y%m%d}_{start:%Y%m%d}_hourly_ALL-wMOUNTAINS.nc"
)
path.parent.mkdir(exist_ok=True, parents=True)
ds = dataset(model, 0, start, end)
ds["concno2"].attrs.update(model_name=model)
ds.to_netcdf(path)

return root

0 comments on commit b9c3b6c

Please sign in to comment.