Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
b8c44cb
Save in/out of stencils and decorated functions
FlorianDeconinck Apr 15, 2025
0a379d8
Move `instrument` to debugger
FlorianDeconinck Apr 17, 2025
95ab6aa
Log debugger on & config in debug
FlorianDeconinck Apr 17, 2025
530208c
Move @instrument once more
FlorianDeconinck Apr 17, 2025
05668c7
Add `track_by_parameter` and moved results in subdirectory
FlorianDeconinck Apr 18, 2025
ac000d8
Simplify configuration
FlorianDeconinck Apr 18, 2025
f8dfbd9
Minor fix to filename
FlorianDeconinck Apr 18, 2025
2c43481
Fix stencils save
FlorianDeconinck Apr 18, 2025
c5d02dc
Normalize dims naming
FlorianDeconinck Apr 28, 2025
454a898
Use full qualified name when possible
FlorianDeconinck Apr 28, 2025
1de97cf
Merge branch 'develop' into experimental/debugger
FlorianDeconinck Apr 28, 2025
440b5ca
Lint
FlorianDeconinck Apr 28, 2025
c2684e2
Coarse documentation for debugger config
FlorianDeconinck Apr 28, 2025
fa81043
Protect catch-all for in xarray failures that don't raise properly
FlorianDeconinck Apr 30, 2025
f1afbb1
Config: allow for compute domain save in Quantity instead of full dat…
FlorianDeconinck Apr 30, 2025
2e8680e
Added tools to plot pyFV3 data
CharlesKrop Apr 30, 2025
92d0557
removed print statements
CharlesKrop Apr 30, 2025
a16ade6
renamed folders
CharlesKrop May 1, 2025
ac9e105
Merge remote-tracking branch 'personal/debug_tools' into experimental…
CharlesKrop May 1, 2025
d5f77af
`plot_cube_sphere` public API
FlorianDeconinck May 1, 2025
ea518dd
FV3 README license ackgnoledgement
FlorianDeconinck May 1, 2025
f8fe74b
Debugger knows how to serialize dataclasses
FlorianDeconinck May 2, 2025
ee4775e
Restore `Robinson` projection
FlorianDeconinck May 2, 2025
a23ce03
Lint
FlorianDeconinck May 2, 2025
a7351c7
Moar lint
FlorianDeconinck May 2, 2025
368803c
Merge branch 'develop' into experimental/debugger
FlorianDeconinck May 19, 2025
379bdfd
Remove `DebugMode`
FlorianDeconinck May 19, 2025
ceb6d6c
Lint
FlorianDeconinck May 19, 2025
dda1013
`cartopy` import is not optional
FlorianDeconinck May 19, 2025
8ca571d
Clear `__init__`
FlorianDeconinck May 19, 2025
5a2f376
lint
FlorianDeconinck May 19, 2025
45ace08
Remove debug print
FlorianDeconinck May 20, 2025
ca9e105
Merge branch 'develop' into experimental/debugger
FlorianDeconinck May 20, 2025
d28198e
Use `field` instead of `view[:]`
FlorianDeconinck May 20, 2025
2595438
Spelling
FlorianDeconinck May 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ndsl/debug/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .config import ndsl_debugger


__all__ = ["ndsl_debugger"]
51 changes: 51 additions & 0 deletions ndsl/debug/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
This module provides configuration for the global debugger `ndsl_debugger`

When loading, the configuration will be searched in the global environment variable
`NDSL_DEBUG_CONFIG`

Configuration is a yaml file of the shape
```yaml
stencils_or_class:
- copy_corners_x_nord
- copy_corners_y_nord
- DGridShallowWaterLagrangianDynamics.__call__
track_parameter_by_name:
- fy
```

Global variable:
ndsl_debugger: Debugger accessible throughout the middleware, default to `None`
if there is no configuration
"""

import os

import yaml

from ndsl.comm.mpi import MPIComm
from ndsl.debug.debugger import Debugger
from ndsl.logging import ndsl_log


ndsl_debugger = None


def _set_debugger():
config = os.getenv("NDSL_DEBUG_CONFIG", "")
if not os.path.exists(config):
if config != "":
ndsl_log.warning(
f"NDSL_DEBUG_CONFIG set but path {config} does not exists."
)
else:
return
with open(config) as file:
config_dict = yaml.load(file.read(), Loader=yaml.SafeLoader)
global ndsl_debugger
Comment thread
romanc marked this conversation as resolved.
ndsl_debugger = Debugger(rank=MPIComm().Get_rank(), **config_dict)
ndsl_log.info("[NDSL Debugger] On")
ndsl_log.debug(f"[NDSL Debugger] Config:\n{config_dict}")


_set_debugger()
Comment thread
oelbert marked this conversation as resolved.
109 changes: 109 additions & 0 deletions ndsl/debug/debugger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import dataclasses
import numbers
import os
import pathlib

import pandas as pd
import xarray as xr

from ndsl.logging import ndsl_log
from ndsl.quantity import Quantity


@dataclasses.dataclass
class Debugger:
"""Debugger relying on `ndsl.debug.config` for setup capable
of doing automatic data save on external configuration."""

# Configuration
stencils_or_class: list[str] = dataclasses.field(default_factory=list)
track_parameter_by_name: list[str] = dataclasses.field(default_factory=list)
save_compute_domain_only: bool = False
dir_name: str = "./"

# Runtime data
rank: int = -1
calls_count: dict[str, int] = dataclasses.field(default_factory=dict)
track_parameter_count: dict[str, int] = dataclasses.field(default_factory=dict)

def _to_xarray(self, data, name) -> xr.DataArray:
if isinstance(data, Quantity):
if self.save_compute_domain_only:
mem = data.field
shp = data.field.shape
else:
mem = data.data
shp = data.shape
elif hasattr(data, "shape"):
mem = data
shp = data.shape
elif (
pd.api.types.is_numeric_dtype(data)
or pd.api.types.is_string_dtype(data)
or isinstance(data, numbers.Number)
):
return xr.DataArray(data)
else:
ndsl_log.error(f"[Debugger] Cannot save data of type {type(data)}")
return xr.DataArray([0])
return xr.DataArray(mem, dims=[f"dim_{i}_{s}" for i, s in enumerate(shp)])

def track_data(self, data_as_dict, source_as_name, is_in) -> None:
for name, data in data_as_dict.items():
if name not in self.track_parameter_by_name:
continue

if name not in self.track_parameter_count:
self.track_parameter_count[name] = 0
count = self.track_parameter_count[name]

path = pathlib.Path(f"{self.dir_name}/debug/tracks/{name}/R{self.rank}/")
os.makedirs(path, exist_ok=True)
path = pathlib.Path(
f"{path}/{count}_{name}_{source_as_name}-{'In' if is_in else 'Out'}.nc4"
)
try:
self._to_xarray(data, name).to_netcdf(path)
except ValueError as e:
from ndsl import ndsl_log

ndsl_log.error(f"[Debugger] Failure to save {data}: {e}")

self.track_parameter_count[name] += 1

def save_as_dataset(self, data_as_dict, savename, is_in) -> None:
"""Save dictionnary of data to NetCDF

Note: Unknown types in the dictionnary won't be saved.
"""
if savename not in self.stencils_or_class:
return

data_arrays = {}
for name, data in data_as_dict.items():
if dataclasses.is_dataclass(data):
for field in dataclasses.fields(data):
data_arrays[f"{name}.{field.name}"] = self._to_xarray(
getattr(data, field.name), field.name
)
else:
data_arrays[name] = self._to_xarray(data, name)

call_count = (
self.calls_count[savename] if savename in self.calls_count.keys() else 0
)
path = pathlib.Path(f"{self.dir_name}/debug/savepoints/R{self.rank}/")
os.makedirs(path, exist_ok=True)
path = pathlib.Path(
f"{path}/{savename}-Call{call_count}-{'In' if is_in else 'Out'}.nc4"
)
try:
xr.Dataset(data_arrays).to_netcdf(path)
except ValueError as e:
ndsl_log.error(f"[DebugInfo] Failure to save {savename}: {e}")

def increment_call_count(self, savename: str):
"""Increment the call count for this savename"""
if savename not in self.calls_count.keys():
self.calls_count[savename] = 0
self.calls_count[savename] += 1
44 changes: 44 additions & 0 deletions ndsl/debug/tooling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import inspect
from functools import wraps
from typing import Any, Callable

from ndsl.debug.config import ndsl_debugger


def instrument(func) -> Callable:
@wraps(func)
def wrapper(self, *args: Any, **kwargs: Any):
if ndsl_debugger is None:
return func(self, *args, **kwargs)
savename = func.__qualname__
params = inspect.signature(func).parameters
data_as_dict = {}

# Positional
positional_count = 0
for name, param in params.items():
if param.kind in (
inspect.Parameter.POSITIONAL_ONLY,
inspect.Parameter.POSITIONAL_OR_KEYWORD,
):
if positional_count == 0: # self
positional_count += 1
continue
if positional_count < len(args) + 1:
data_as_dict[name] = args[positional_count - 1]
positional_count += 1
# Keyword arguments
for name, value in kwargs.items():
if name in params:
data_as_dict[name] = value
if ndsl_debugger is not None:
ndsl_debugger.save_as_dataset(data_as_dict, func.__qualname__, is_in=True)
ndsl_debugger.track_data(data_as_dict, func.__qualname__, is_in=True)
r = func(self, *args, **kwargs)
if ndsl_debugger is not None:
ndsl_debugger.save_as_dataset(data_as_dict, func.__qualname__, is_in=False)
ndsl_debugger.track_data(data_as_dict, func.__qualname__, is_in=False)
ndsl_debugger.increment_call_count(savename)
return r

return wrapper
32 changes: 28 additions & 4 deletions ndsl/dsl/stencil.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
import numpy as np
from gt4py.cartesian import gtscript
from gt4py.cartesian.gtc.passes.oir_pipeline import DefaultPipeline, OirPipeline
from gt4py.cartesian.stencil_object import StencilObject

from ndsl.comm.comm_abc import Comm
from ndsl.comm.communicator import Communicator
from ndsl.comm.decomposition import block_waiting_for_compilation, unblock_waiting_tiles
from ndsl.comm.mpi import MPI
from ndsl.constants import X_DIM, X_DIMS, Y_DIM, Y_DIMS, Z_DIM, Z_DIMS
from ndsl.debug import ndsl_debugger
from ndsl.dsl.dace.orchestration import SDFGConvertible
from ndsl.dsl.stencil_config import CompilationConfig, RunMode, StencilConfig
from ndsl.dsl.typing import Float, Index3D, cast_to_index3d
Expand Down Expand Up @@ -295,10 +297,11 @@ def __init__(
externals = {}
self.externals = externals
self._func_name = func.__name__
self._func_qualname = func.__qualname__
stencil_kwargs = self.stencil_config.stencil_kwargs(
skip_passes=skip_passes, func=func
)
self.stencil_object = None
self.stencil_object: StencilObject | None = None

self._argument_names = tuple(inspect.getfullargspec(func).args)

Expand Down Expand Up @@ -350,7 +353,7 @@ def __init__(
dtypes={float: Float},
**stencil_kwargs,
build_info=(build_info := {}),
)
) # type: ignore

if (
compilation_config.use_minimal_caching
Expand Down Expand Up @@ -384,20 +387,32 @@ def nothing_function(*args, **kwargs):
setattr(self, "__call__", nothing_function)

def __call__(self, *args, **kwargs) -> None:
# Verbose stencil execution
if self.stencil_config.verbose:
ndsl_log.debug(f"Running {self._func_name}")

# Marshal arguments
args_list = list(args)
_convert_quantities_to_storage(args_list, kwargs)
args = tuple(args_list)

args_as_kwargs = dict(zip(self._argument_names, args))

# Ranks comparison tool
if self.comm is not None:
differences = compare_ranks(self.comm, {**args_as_kwargs, **kwargs})
if len(differences) > 0:
raise ValueError(
f"rank {self.comm.Get_rank()} has differences {differences} "
f"before calling {self._func_name}"
)

# Debugger actions if turned on
if ndsl_debugger:
all_args = args_as_kwargs | kwargs
ndsl_debugger.save_as_dataset(all_args, self._func_qualname, is_in=True)
ndsl_debugger.track_data(all_args, self._func_qualname, is_in=True)

# Execute stencil
if self.stencil_config.compilation_config.validate_args:
if __debug__ and "origin" in kwargs:
raise TypeError("origin cannot be passed to FrozenStencil call")
Expand All @@ -410,14 +425,23 @@ def __call__(self, *args, **kwargs) -> None:
domain=self.domain,
validate_args=True,
exec_info=self._timing_collector.exec_info,
)
) # type: ignore
else:
self.stencil_object.run(
**args_as_kwargs,
**kwargs,
**self._stencil_run_kwargs,
exec_info=self._timing_collector.exec_info,
)

# Debugger actions if turned on
if ndsl_debugger:
all_args = args_as_kwargs | kwargs
ndsl_debugger.save_as_dataset(all_args, self._func_qualname, is_in=False)
ndsl_debugger.track_data(all_args, self._func_qualname, is_in=False)
ndsl_debugger.increment_call_count(self._func_qualname)

# Ranks comparison tool
if self.comm is not None:
differences = compare_ranks(self.comm, {**args_as_kwargs, **kwargs})
if len(differences) > 0:
Expand Down
4 changes: 4 additions & 0 deletions ndsl/viz/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .cube_sphere import plot_cube_sphere


__all__ = ["plot_cube_sphere"]
36 changes: 36 additions & 0 deletions ndsl/viz/cube_sphere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np
from cartopy import crs as ccrs
from matplotlib import pyplot as plt

from ndsl import Quantity, ndsl_log
from ndsl.comm.communicator import Communicator
from ndsl.grid import GridData
from ndsl.viz.fv3 import pcolormesh_cube


def plot_cube_sphere(
quantity: Quantity,
k_level: int,
comm: Communicator,
grid_data: GridData,
save_to_path: str,
):
if len(quantity.shape) < 2 or len(quantity.shape) > 3:
ndsl_log.error(
f"[Plot Cube] Can't plot quantity with shape == {quantity.shape}"
)
return

data = comm.gather(quantity)
lat = comm.gather(grid_data.lat)
lon = comm.gather(grid_data.lon)

if comm.rank == 0:
fig, ax = plt.subplots(1, 1, subplot_kw={"projection": ccrs.Robinson()})
pcolormesh_cube(
lat.view[:] * 180.0 / np.pi,
lon.view[:] * 180.0 / np.pi,
data.view[:] if len(data.shape) == 3 else data.view[:, :, :, k_level],
ax=ax,
)
fig.savefig(save_to_path)
14 changes: 14 additions & 0 deletions ndsl/viz/fv3/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Acknowledgment

This code was lifted from <https://github.com/ai2cm/fv3net> and developped by AI2 under the MIT license (see below).
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're sure there's not a license violation or conflict between this and our top-level license?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also assuming you talked to the AI2 folks about this?

Actually, why not directly import fv3net? I have it as a Pace dependency (at least in the dockerfile...)?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because fv3net is a huge repository - and the viz is a very small code untouched for the last three years. It's the case of bring in thousands of line of code, for the benefit of using a handful.

I'll check the license, I think it's ok.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a layer here, but the MIT license (as stated below) is pretty loose. In particular it allows to modify and re-distribute the code provided that the license header is preserved.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I checked and this is correct. If we were to make "substantial" changes we could rope the code in the Apache 2.0.

MIT is basically covering up AI2 for any side effect and by knock-on free us to use or reuse as is. If we modify we can argue that the license applying to the code is the one under we operate

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also checked lol, but yeah it seems like this is fine. Probably still good to drop Oli WM a line to let him know if you haven't already


## MIT License

The MIT License (MIT)
Copyright (c) 2019, The Allen Institute for Artificial Intelligence

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Loading