From 0ea750500e571035f8f8b69308fc2a00b2b04fa8 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 8 Aug 2025 09:37:43 +0200 Subject: [PATCH 1/6] Deprecate PACE_DACE_DEBUG in favor of NDSL_DACE_DEBUG --- ndsl/dsl/dace/dace_config.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/ndsl/dsl/dace/dace_config.py b/ndsl/dsl/dace/dace_config.py index f6e6bb26..484be830 100644 --- a/ndsl/dsl/dace/dace_config.py +++ b/ndsl/dsl/dace/dace_config.py @@ -15,6 +15,7 @@ from ndsl.dsl.caches.codepath import FV3CodePath from ndsl.dsl.gt4py_utils import is_gpu_backend from ndsl.dsl.typing import get_precision +from ndsl.logging import ndsl_log from ndsl.optional_imports import cupy as cp @@ -24,6 +25,21 @@ DEACTIVATE_DISTRIBUTED_DACE_COMPILE = False +def _debug_dace_orchestration() -> bool: + """ + Debugging Dace orchestration deeper can be done by turning on `syncdebug`. + We control this Dace configuration below with our own override. + """ + if os.getenv("PACE_DACE_DEBUG", ""): + ndsl_log.warning("PACE_DACE_DEBUG is deprecated. Use NDSL_DACE_DEBUG instead.") + if os.getenv("NDSL_DACE_DEBUG", ""): + ndsl_log.warning( + "PACE_DACE_DEBUG and NDSL_DACE_DEBUG were both specified. NDSL_DACE_DEBUG will take precedence." + ) + + return os.getenv("NDSL_DACE_DEBUG", os.getenv("PACE_DACE_DEBUG", "False")) == "True" + + def _is_corner(rank: int, partitioner: Partitioner) -> bool: if partitioner.tile.on_tile_bottom(rank): if partitioner.tile.on_tile_left(rank): @@ -178,14 +194,10 @@ def __init__( else: self._orchestrate = orchestration - # Debugging Dace orchestration deeper can be done by turning on `syncdebug` - # We control this Dace configuration below with our own override - dace_debug_env_var = os.getenv("PACE_DACE_DEBUG", "False") == "True" - # We hijack the optimization level of GT4Py because we don't # have the configuration at NDSL level, but we do use the GT4Py # level - # TODO: if GT4PY opt level is funnled via NDSL - use it here + # TODO: if GT4PY opt level is funneled via NDSL - use it here optimization_level = GT4PY_COMPILE_OPT_LEVEL # Set the configuration of DaCe to a rigid & tested set of divergence @@ -283,7 +295,7 @@ def __init__( # Enable to debug GPU failures dace.config.Config.set( - "compiler", "cuda", "syncdebug", value=dace_debug_env_var + "compiler", "cuda", "syncdebug", value=_debug_dace_orchestration() ) if get_precision() == 32: From 5618cc0d110b4dc354b00cb0370db5e51cb6434a Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 8 Aug 2025 09:56:51 +0200 Subject: [PATCH 2/6] Deprecate PACE_LOGLEVEL in favor of NDSL_LOGLEVEL --- ndsl/logging.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/ndsl/logging.py b/ndsl/logging.py index 5e7c845e..1d7e7e80 100644 --- a/ndsl/logging.py +++ b/ndsl/logging.py @@ -8,8 +8,6 @@ from mpi4py import MPI -LOGLEVEL = os.environ.get("PACE_LOGLEVEL", "INFO").lower() - # Python log levels are hierarchical, therefore setting INFO # means DEBUG and everything lower will be logged. AVAILABLE_LOG_LEVELS = { @@ -21,6 +19,28 @@ } +def _get_log_level(default: str = "info"): + if os.getenv("PACE_LOGLEVEL", ""): + ndsl_log.warning("PACE_LOGLEVEL is deprecated. Use NDSL_LOGLEVEL instead.") + if os.getenv("NDSL_LOGLEVEL", ""): + ndsl_log.warning( + "PACE_LOGLEVEL and NDSL_LOGLEVEL were both specified. NDSL_LOGLEVEL will take precedence." + ) + + loglevel = os.getenv("NDSL_LOGLEVEL", os.getenv("PACE_LOGLEVEL", default)).lower() + + if loglevel in AVAILABLE_LOG_LEVELS.keys(): + return loglevel + + ndsl_log.warning( + f"Unknown log level '{loglevel}', falling back to '{default}'. Valid values are: {AVAILABLE_LOG_LEVELS.keys()}." + ) + return default + + +LOGLEVEL = _get_log_level() + + def _ndsl_logger() -> logging.Logger: name_log = logging.getLogger(__name__) name_log.setLevel(AVAILABLE_LOG_LEVELS[LOGLEVEL]) From 3e7be6ac801b25d9133110f0e42c873f6b6bc1a3 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 8 Aug 2025 10:13:00 +0200 Subject: [PATCH 3/6] Deprecate PACE_FLOAT_PRECISION in favor of NDSL_LITERAL_PRECISION --- docs/index.md | 2 +- ndsl/dsl/__init__.py | 30 +++++++++++++++++++++++++++++- ndsl/dsl/typing.py | 5 +++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/docs/index.md b/docs/index.md index 650a2846..02945fa0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -68,7 +68,7 @@ Configurations for Pace to use NDSL with different backend: - Run: load pre-compiled program and execute, fail if the .so is not present (_no hash check!_) (backend must be `dace:gpu` or `dace:cpu`) -- PACE_FLOAT_PRECISION=64 control the floating point precision throughout the program. +- NDSL_FLOAT_PRECISION=64 control the floating point precision throughout the program. Install Pace with different NDSL backend: diff --git a/ndsl/dsl/__init__.py b/ndsl/dsl/__init__.py index e3fe0cc8..b30d0a35 100644 --- a/ndsl/dsl/__init__.py +++ b/ndsl/dsl/__init__.py @@ -1,8 +1,10 @@ # Literal precision for both GT4Py & NDSL import os import sys +from typing import Literal from ndsl.comm.mpi import MPI +from ndsl.logging import ndsl_log gt4py_config_module = "gt4py.cartesian.config" @@ -12,7 +14,33 @@ " Please import `ndsl.dsl` or any `ndsl` module " " before any `gt4py` imports." ) -NDSL_GLOBAL_PRECISION = int(os.getenv("PACE_FLOAT_PRECISION", "64")) + + +def _get_literal_precision(default: Literal["32", "64"] = "64") -> Literal["32", "64"]: + if os.getenv("PACE_FLOAT_PRECISION", ""): + ndsl_log.warning( + "PACE_FLOAT_PRECISION is deprecated. Use NDSL_LITERAL_PRECISION instead." + ) + if os.getenv("NDSL_LITERAL_PRECISION", ""): + ndsl_log.warning( + "PACE_FLOAT_PRECISION and NDSL_LOGLEVEL were both specified. NDSL_LITERAL_PRECISION will take precedence." + ) + + precision = os.getenv( + "NDSL_LITERAL_PRECISION", os.getenv("PACE_FLOAT_PRECISION", default) + ) + + expected: list[Literal["32", "64"]] = ["32", "64"] + if precision in expected: + return precision # type: ignore + + ndsl_log.warning( + f"Unexpected literal precision '{precision}', falling back to '{default}'. Valid values are {expected}." + ) + return default + + +NDSL_GLOBAL_PRECISION = int(_get_literal_precision()) os.environ["GT4PY_LITERAL_PRECISION"] = str(NDSL_GLOBAL_PRECISION) diff --git a/ndsl/dsl/typing.py b/ndsl/dsl/typing.py index 33e01b10..fcbb42af 100644 --- a/ndsl/dsl/typing.py +++ b/ndsl/dsl/typing.py @@ -1,9 +1,10 @@ -import os from typing import Tuple, TypeAlias, Union, cast import numpy as np from gt4py.cartesian import gtscript +from ndsl.dsl import NDSL_GLOBAL_PRECISION + # A Field Field = gtscript.Field @@ -23,7 +24,7 @@ def get_precision() -> int: - return int(os.getenv("PACE_FLOAT_PRECISION", "64")) + return NDSL_GLOBAL_PRECISION # We redefine the type as a way to distinguish From a11bcd2c6c9124459942bea3b32f149ed8d3ca16 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 8 Aug 2025 10:47:12 +0200 Subject: [PATCH 4/6] Deprecate PACE_CONSTANTS in favor of NDSL_CONSTANTS --- ndsl/constants.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/ndsl/constants.py b/ndsl/constants.py index 7092f361..82d16d30 100644 --- a/ndsl/constants.py +++ b/ndsl/constants.py @@ -1,5 +1,6 @@ import os from enum import Enum +from typing import Literal import numpy as np @@ -16,13 +17,29 @@ class ConstantVersions(Enum): GEOS = "GEOS" # Constant as defined in GEOS v11.4.2 -CONST_VERSION_AS_STR = os.environ.get("PACE_CONSTANTS", "UFS") +def _get_constant_version( + default: Literal["GFDL", "UFS", "GEOS"] = "UFS", +) -> Literal["GFDL", "UFS", "GEOS"]: + if os.getenv("PACE_CONSTANTS", ""): + ndsl_log.warning("PACE_CONSTANTS is deprecated. Use NDSL_CONSTANTS instead.") + if os.getenv("NDSL_CONSTANTS", ""): + ndsl_log.warning( + "PACE_CONSTANTS and NDSL_CONSTANTS were both specified. NDSL_CONSTANTS will take precedence." + ) -try: - CONST_VERSION = ConstantVersions[CONST_VERSION_AS_STR] - ndsl_log.info(f"Constant selected: {CONST_VERSION}") -except KeyError as e: - raise RuntimeError(f"Constants {CONST_VERSION_AS_STR} is not implemented, abort.") + constants_as_str = os.getenv("NDSL_CONSTANTS", os.getenv("PACE_CONSTANTS", default)) + expected: list[Literal["GFDL", "UFS", "GEOS"]] = ["GFDL", "UFS", "GEOS"] + + if constants_as_str not in expected: + raise RuntimeError( + f"Constants '{constants_as_str}' is not implemented, abort. Valid values are {expected}." + ) + + return constants_as_str # type: ignore + + +CONST_VERSION = ConstantVersions[_get_constant_version()] +ndsl_log.info(f"Constant selected: {CONST_VERSION}") ##################### # Common constants From 0c646beb3b681b4ffd42a681b6e593168b627430 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Fri, 8 Aug 2025 13:30:03 +0200 Subject: [PATCH 5/6] Don't use ndsl_log before it is defined --- ndsl/logging.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ndsl/logging.py b/ndsl/logging.py index 1d7e7e80..13c514d3 100644 --- a/ndsl/logging.py +++ b/ndsl/logging.py @@ -21,9 +21,9 @@ def _get_log_level(default: str = "info"): if os.getenv("PACE_LOGLEVEL", ""): - ndsl_log.warning("PACE_LOGLEVEL is deprecated. Use NDSL_LOGLEVEL instead.") + logging.warning("PACE_LOGLEVEL is deprecated. Use NDSL_LOGLEVEL instead.") if os.getenv("NDSL_LOGLEVEL", ""): - ndsl_log.warning( + logging.warning( "PACE_LOGLEVEL and NDSL_LOGLEVEL were both specified. NDSL_LOGLEVEL will take precedence." ) @@ -32,21 +32,20 @@ def _get_log_level(default: str = "info"): if loglevel in AVAILABLE_LOG_LEVELS.keys(): return loglevel - ndsl_log.warning( + logging.warning( f"Unknown log level '{loglevel}', falling back to '{default}'. Valid values are: {AVAILABLE_LOG_LEVELS.keys()}." ) return default -LOGLEVEL = _get_log_level() - - def _ndsl_logger() -> logging.Logger: + log_level = _get_log_level() + name_log = logging.getLogger(__name__) - name_log.setLevel(AVAILABLE_LOG_LEVELS[LOGLEVEL]) + name_log.setLevel(AVAILABLE_LOG_LEVELS[log_level]) handler = logging.StreamHandler(sys.stdout) - handler.setLevel(AVAILABLE_LOG_LEVELS[LOGLEVEL]) + handler.setLevel(AVAILABLE_LOG_LEVELS[log_level]) formatter = logging.Formatter( fmt=( f"%(asctime)s|%(levelname)s|rank {MPI.COMM_WORLD.Get_rank()}|" @@ -60,14 +59,16 @@ def _ndsl_logger() -> logging.Logger: def _ndsl_logger_on_rank_0() -> logging.Logger: + log_level = _get_log_level() + name_log = logging.getLogger(f"{__name__}_on_rank_0") - name_log.setLevel(AVAILABLE_LOG_LEVELS[LOGLEVEL]) + name_log.setLevel(AVAILABLE_LOG_LEVELS[log_level]) rank = MPI.COMM_WORLD.Get_rank() if rank == 0: handler = logging.StreamHandler(sys.stdout) - handler.setLevel(AVAILABLE_LOG_LEVELS[LOGLEVEL]) + handler.setLevel(AVAILABLE_LOG_LEVELS[log_level]) formatter = logging.Formatter( fmt=( f"%(asctime)s|%(levelname)s|rank {MPI.COMM_WORLD.Get_rank()}|" From ae627f4d717087e5e0d40e11873f7e6f0792ab84 Mon Sep 17 00:00:00 2001 From: Roman Cattaneo <1116746+romanc@users.noreply.github.com> Date: Mon, 11 Aug 2025 09:16:04 +0200 Subject: [PATCH 6/6] Document correctly renamed env variable --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 02945fa0..a87616b0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -68,7 +68,7 @@ Configurations for Pace to use NDSL with different backend: - Run: load pre-compiled program and execute, fail if the .so is not present (_no hash check!_) (backend must be `dace:gpu` or `dace:cpu`) -- NDSL_FLOAT_PRECISION=64 control the floating point precision throughout the program. +- NDSL_LITERAL_PRECISION=64 controls the floating point precision throughout the program. Install Pace with different NDSL backend: