From 4debbd93371cace867afdde2b92e6c6602839c2b Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 10 Sep 2025 17:26:48 +0200 Subject: [PATCH 01/12] remove `FrozenCompiledSDFG` for safety reason --- ndsl/dsl/dace/build.py | 9 +++++++++ ndsl/dsl/dace/orchestration.py | 9 +++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ndsl/dsl/dace/build.py b/ndsl/dsl/dace/build.py index 25c30d00..bd4e8f72 100644 --- a/ndsl/dsl/dace/build.py +++ b/ndsl/dsl/dace/build.py @@ -128,6 +128,7 @@ def set_distributed_caches(config: DaceConfig): # Set read/write caches to the target rank from gt4py.cartesian import config as gt_config + import dace if config.do_compile: verb = "reading/writing" @@ -135,6 +136,14 @@ def set_distributed_caches(config: DaceConfig): verb = "reading" gt_config.cache_settings["dir_name"] = get_cache_directory(config.code_path) + + dace.Config.set( + "default_build_folder", + value="{gt_root}/{gt_cache}/dacecache".format( + gt_root=gt_config.cache_settings["root_path"], + gt_cache=gt_config.cache_settings["dir_name"], + ), + ) ndsl_log.info( f"[{orchestration_mode}] Rank {config.my_rank} " f"{verb} cache {gt_config.cache_settings['dir_name']}" diff --git a/ndsl/dsl/dace/orchestration.py b/ndsl/dsl/dace/orchestration.py index acbdd38b..6e2d46fc 100644 --- a/ndsl/dsl/dace/orchestration.py +++ b/ndsl/dsl/dace/orchestration.py @@ -220,9 +220,7 @@ def _build_sdfg( compiledSDFG, _ = dace_program.load_precompiled_sdfg( sdfg_path, *args, **kwargs ) - config.loaded_precompiled_SDFG[dace_program] = FrozenCompiledSDFG( - dace_program, compiledSDFG, args, kwargs - ) + config.loaded_precompiled_SDFG[dace_program] = compiledSDFG return _call_sdfg(dace_program, sdfg, config, args, kwargs) @@ -237,7 +235,10 @@ def _call_sdfg(dace_program: DaceProgram, sdfg: SDFG, config: DaceConfig, args, with DaCeProgress(config, "Run"): if config.is_gpu_backend(): _upload_to_device(list(args) + list(kwargs.values())) - res = config.loaded_precompiled_SDFG[dace_program]() + current_sdfg_args = dace_program._create_sdfg_args( + config.loaded_precompiled_SDFG[dace_program].sdfg, args, kwargs + ) + res = config.loaded_precompiled_SDFG[dace_program](**current_sdfg_args) res = _download_results_from_dace( config, res, list(args) + list(kwargs.values()) ) From c8888dfb15ae1cfbd875cc8be7e72a67ed5cc937 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Thu, 11 Sep 2025 14:11:21 +0200 Subject: [PATCH 02/12] fast corner impl --- ndsl/buffer.py | 9 +- ndsl/stencils/__init__.py | 5 +- ndsl/stencils/corners.py | 187 ++++++++++++++++++++++++++++++-------- 3 files changed, 157 insertions(+), 44 deletions(-) diff --git a/ndsl/buffer.py b/ndsl/buffer.py index 8f0c90fd..4bae4565 100644 --- a/ndsl/buffer.py +++ b/ndsl/buffer.py @@ -4,7 +4,6 @@ from typing import Callable, Dict, Generator, Iterable, List, Optional, Tuple import numpy as np -from numpy.lib.index_tricks import IndexExpression from ndsl.performance.timer import NullTimer, Timer from ndsl.types import Allocator @@ -78,8 +77,8 @@ def finalize_memory_transfer(self): def assign_to( self, destination_array: np.ndarray, - buffer_slice: IndexExpression = np.index_exp[:], - buffer_reshape: IndexExpression = None, + buffer_slice=np.index_exp[:], + buffer_reshape=None, ): """Assign internal array to destination_array. @@ -94,9 +93,7 @@ def assign_to( np.reshape(self.array[buffer_slice], buffer_reshape, order="C"), ) - def assign_from( - self, source_array: np.ndarray, buffer_slice: IndexExpression = np.index_exp[:] - ): + def assign_from(self, source_array: np.ndarray, buffer_slice=np.index_exp[:]): """Assign source_array to internal array. Args: diff --git a/ndsl/stencils/__init__.py b/ndsl/stencils/__init__.py index 8a635187..43647b75 100644 --- a/ndsl/stencils/__init__.py +++ b/ndsl/stencils/__init__.py @@ -1,8 +1,9 @@ -from .corners import CopyCorners, CopyCornersXY, FillCornersBGrid +from .corners import CopyCornersX, CopyCornersY, CopyCornersXY, FillCornersBGrid __all__ = [ - "CopyCorners", + "CopyCornersX", + "CopyCornersY", "CopyCornersXY", "FillCornersBGrid", ] diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py index 4c13b021..358b7219 100644 --- a/ndsl/stencils/corners.py +++ b/ndsl/stencils/corners.py @@ -4,59 +4,171 @@ from gt4py.cartesian.gtscript import PARALLEL, computation, horizontal, interval, region from ndsl.constants import ( - X_DIM, X_INTERFACE_DIM, - Y_DIM, Y_INTERFACE_DIM, Z_INTERFACE_DIM, ) from ndsl.dsl.stencil import GridIndexing, StencilFactory from ndsl.dsl.typing import FloatField +from ndsl import StencilFactory, orchestrate -class CopyCorners: +def corner_copy_x(field_to_copy): + """Equivalent to the copy_corners_x functions in fortran. + + This is written to operate on plain ndarrarys and not use the GT4Py framework. + This choice was made because we've seen a lot of performance left on the table using + orchestration without explicitly describing the operations but rather have full 3d- + sweeps with conditionals. + Since DaCe can handle (simple) operations on ndarrays directly this gives us a more + explicit entrypoint to the language and more optimization-potential. + + Args: + field_to_copy (ndarray): field to apply the corner copy on. + This is explicitly not type-hinted for orchestration + """ + field_to_copy[0, 0] = field_to_copy[0, 5] + field_to_copy[0, 1] = field_to_copy[1, 5] + field_to_copy[0, 2] = field_to_copy[2, 5] + + field_to_copy[1, 0] = field_to_copy[0, 4] + field_to_copy[1, 1] = field_to_copy[1, 4] + field_to_copy[1, 2] = field_to_copy[2, 4] + + field_to_copy[2, 0] = field_to_copy[0, 3] + field_to_copy[2, 1] = field_to_copy[1, 3] + field_to_copy[2, 2] = field_to_copy[2, 3] + + field_to_copy[0, -4] = field_to_copy[2, -7] + field_to_copy[0, -3] = field_to_copy[1, -7] + field_to_copy[0, -2] = field_to_copy[0, -7] + + field_to_copy[1, -4] = field_to_copy[2, -6] + field_to_copy[1, -3] = field_to_copy[1, -6] + field_to_copy[1, -2] = field_to_copy[0, -6] + + field_to_copy[2, -4] = field_to_copy[2, -5] + field_to_copy[2, -3] = field_to_copy[1, -5] + field_to_copy[2, -2] = field_to_copy[0, -5] + + field_to_copy[-4, 0] = field_to_copy[-2, 3] + field_to_copy[-4, 1] = field_to_copy[-3, 3] + field_to_copy[-4, 2] = field_to_copy[-4, 3] + + field_to_copy[-3, 0] = field_to_copy[-2, 4] + field_to_copy[-3, 1] = field_to_copy[-3, 4] + field_to_copy[-3, 2] = field_to_copy[-4, 4] + + field_to_copy[-2, 0] = field_to_copy[-2, 5] + field_to_copy[-2, 1] = field_to_copy[-3, 5] + field_to_copy[-2, 2] = field_to_copy[-4, 5] + + field_to_copy[-4, -2] = field_to_copy[-2, -5] + field_to_copy[-4, -3] = field_to_copy[-3, -5] + field_to_copy[-4, -4] = field_to_copy[-4, -5] + + field_to_copy[-3, -2] = field_to_copy[-2, -6] + field_to_copy[-3, -3] = field_to_copy[-3, -6] + field_to_copy[-3, -4] = field_to_copy[-4, -6] + + field_to_copy[-2, -2] = field_to_copy[-2, -7] + field_to_copy[-2, -3] = field_to_copy[-3, -7] + field_to_copy[-2, -4] = field_to_copy[-4, -7] + + +def corner_copy_y(field_to_copy): + """Equivalent to the copy_corners_y functions in fortran. + + This is written to operate on plain ndarrarys and not use the GT4Py framework. + This choice was made because we've seen a lot of performance left on the table using + orchestration without explicitly describing the operations but rather have full 3d- + sweeps with conditionals. + Since DaCe can handle (simple) operations on ndarrays directly this gives us a more + explicit entrypoint to the language and more optimization-potential. + + Args: + field_to_copy (ndarray): field to apply the corner copy on. + This is explicitly not type-hinted for orchestration + """ + field_to_copy[0, 0] = field_to_copy[5, 0] + field_to_copy[1, 0] = field_to_copy[5, 1] + field_to_copy[2, 0] = field_to_copy[5, 2] + + field_to_copy[0, 1] = field_to_copy[4, 0] + field_to_copy[1, 1] = field_to_copy[4, 1] + field_to_copy[2, 1] = field_to_copy[4, 2] + + field_to_copy[0, 2] = field_to_copy[3, 0] + field_to_copy[1, 2] = field_to_copy[3, 1] + field_to_copy[2, 2] = field_to_copy[3, 2] + + field_to_copy[-4, 0] = field_to_copy[-7, 2] + field_to_copy[-3, 0] = field_to_copy[-7, 1] + field_to_copy[-2, 0] = field_to_copy[-7, 0] + + field_to_copy[-4, 1] = field_to_copy[-6, 2] + field_to_copy[-3, 1] = field_to_copy[-6, 1] + field_to_copy[-2, 1] = field_to_copy[-6, 0] + + field_to_copy[-4, 2] = field_to_copy[-5, 2] + field_to_copy[-3, 2] = field_to_copy[-5, 1] + field_to_copy[-2, 2] = field_to_copy[-5, 0] + + field_to_copy[0, -2] = field_to_copy[5, -2] + field_to_copy[0, -3] = field_to_copy[4, -2] + field_to_copy[0, -4] = field_to_copy[3, -2] + + field_to_copy[1, -2] = field_to_copy[5, -3] + field_to_copy[1, -3] = field_to_copy[4, -3] + field_to_copy[1, -4] = field_to_copy[3, -3] + + field_to_copy[2, -2] = field_to_copy[5, -4] + field_to_copy[2, -3] = field_to_copy[4, -4] + field_to_copy[2, -4] = field_to_copy[3, -4] + + field_to_copy[-2, -4] = field_to_copy[-5, -2] + field_to_copy[-2, -3] = field_to_copy[-6, -2] + field_to_copy[-2, -2] = field_to_copy[-7, -2] + + field_to_copy[-3, -4] = field_to_copy[-5, -3] + field_to_copy[-3, -3] = field_to_copy[-6, -3] + field_to_copy[-3, -2] = field_to_copy[-7, -3] + + field_to_copy[-4, -4] = field_to_copy[-5, -4] + field_to_copy[-4, -3] = field_to_copy[-6, -4] + field_to_copy[-4, -2] = field_to_copy[-7, -4] + + +class CopyCornersX: """ Helper-class to copy corners corresponding to the fortran functions copy_corners_x or copy_corners_y respectively """ - def __init__(self, direction: str, stencil_factory: StencilFactory) -> None: - """The grid for this stencil""" - grid_indexing = stencil_factory.grid_indexing - - n_halo = grid_indexing.n_halo - origin, domain = grid_indexing.get_origin_domain( - dims=[X_DIM, Y_DIM, Z_INTERFACE_DIM], halos=(n_halo, n_halo) + def __init__(self, stencil_factory: StencilFactory) -> None: + orchestrate( + obj=self, + config=stencil_factory.config.dace_config, ) - ax_offsets = grid_indexing.axis_offsets(origin, domain) - if direction == "x": - self._copy_corners = stencil_factory.from_origin_domain( - func=copy_corners_x_stencil_defn, - origin=origin, - domain=domain, - externals={ - **ax_offsets, - }, - ) - elif direction == "y": - self._copy_corners = stencil_factory.from_origin_domain( - func=copy_corners_y_stencil_defn, - origin=origin, - domain=domain, - externals={ - **ax_offsets, - }, - ) - else: - raise ValueError("Direction must be either 'x' or 'y'") + def __call__(self, field: FloatField): + corner_copy_x(field) + + +class CopyCornersY: + """ + Helper-class to copy corners corresponding to the fortran functions + copy_corners_x or copy_corners_y respectively + """ + + def __init__(self, stencil_factory: StencilFactory) -> None: + orchestrate( + obj=self, + config=stencil_factory.config.dace_config, + ) def __call__(self, field: FloatField): - """ - Fills cell quantity field using corners from itself and multipliers - in the direction specified initialization of the instance of this class. - """ - self._copy_corners(field, field) + corner_copy_y(field) class CopyCornersXY: @@ -1001,6 +1113,9 @@ def fill_corners_dgrid_defn( from __externals__ import i_end, i_start, j_end, j_start with computation(PARALLEL), interval(...): + # this line of code is used to fix the missing symbol crash due to the node visitor depth limitation + acoef = mysign + x_out = x_out # sw corner with horizontal(region[i_start - 1, j_start - 1]): x_out = mysign * y_in[0, 1, 0] From 82c152e9f66d9774fa0be629a115e5cbef4317a9 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Tue, 23 Sep 2025 17:53:35 +0200 Subject: [PATCH 03/12] revert buffer changes --- ndsl/buffer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ndsl/buffer.py b/ndsl/buffer.py index 4bae4565..8f0c90fd 100644 --- a/ndsl/buffer.py +++ b/ndsl/buffer.py @@ -4,6 +4,7 @@ from typing import Callable, Dict, Generator, Iterable, List, Optional, Tuple import numpy as np +from numpy.lib.index_tricks import IndexExpression from ndsl.performance.timer import NullTimer, Timer from ndsl.types import Allocator @@ -77,8 +78,8 @@ def finalize_memory_transfer(self): def assign_to( self, destination_array: np.ndarray, - buffer_slice=np.index_exp[:], - buffer_reshape=None, + buffer_slice: IndexExpression = np.index_exp[:], + buffer_reshape: IndexExpression = None, ): """Assign internal array to destination_array. @@ -93,7 +94,9 @@ def assign_to( np.reshape(self.array[buffer_slice], buffer_reshape, order="C"), ) - def assign_from(self, source_array: np.ndarray, buffer_slice=np.index_exp[:]): + def assign_from( + self, source_array: np.ndarray, buffer_slice: IndexExpression = np.index_exp[:] + ): """Assign source_array to internal array. Args: From 86641a776d0b11864f0b92430db1b19846936fca Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 24 Sep 2025 09:52:33 +0200 Subject: [PATCH 04/12] odd merge --- ndsl/dsl/dace/build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ndsl/dsl/dace/build.py b/ndsl/dsl/dace/build.py index 4e116b5c..ba256e38 100644 --- a/ndsl/dsl/dace/build.py +++ b/ndsl/dsl/dace/build.py @@ -147,6 +147,7 @@ def set_distributed_caches(config: DaceConfig): gt_cache=gt_config.cache_settings["dir_name"], ), ) + ndsl_log.info( f"[{orchestration_mode}] Rank {config.my_rank} " f"{verb} cache {gt_config.cache_settings['dir_name']}" From e98a6893be46498dbf429ab8b0b535d6fb968fab Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 24 Sep 2025 14:40:36 +0200 Subject: [PATCH 05/12] deprecate instead of remove --- ndsl/stencils/corners.py | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py index 358b7219..1b55fd96 100644 --- a/ndsl/stencils/corners.py +++ b/ndsl/stencils/corners.py @@ -1,10 +1,13 @@ from typing import Optional, Sequence, Tuple +import warnings from gt4py.cartesian import gtscript from gt4py.cartesian.gtscript import PARALLEL, computation, horizontal, interval, region from ndsl.constants import ( + X_DIM, X_INTERFACE_DIM, + Y_DIM, Y_INTERFACE_DIM, Z_INTERFACE_DIM, ) @@ -13,6 +16,58 @@ from ndsl import StencilFactory, orchestrate +class CopyCorners: + """ + Helper-class to copy corners corresponding to the fortran functions + copy_corners_x or copy_corners_y respectively + """ + + def __init__(self, direction: str, stencil_factory: StencilFactory) -> None: + """The grid for this stencil""" + warnings.warn( + "Usage of the GT4Py implementation of CopyCorners is discouraged and will" + "be removed in the next release. Use `CopyCornersX` or `CopyCornersY` directly" + "for a more future-proof implementation of the same code.", + DeprecationWarning, + 2, + ) + grid_indexing = stencil_factory.grid_indexing + + n_halo = grid_indexing.n_halo + origin, domain = grid_indexing.get_origin_domain( + dims=[X_DIM, Y_DIM, Z_INTERFACE_DIM], halos=(n_halo, n_halo) + ) + + ax_offsets = grid_indexing.axis_offsets(origin, domain) + if direction == "x": + self._copy_corners = stencil_factory.from_origin_domain( + func=copy_corners_x_stencil_defn, + origin=origin, + domain=domain, + externals={ + **ax_offsets, + }, + ) + elif direction == "y": + self._copy_corners = stencil_factory.from_origin_domain( + func=copy_corners_y_stencil_defn, + origin=origin, + domain=domain, + externals={ + **ax_offsets, + }, + ) + else: + raise ValueError("Direction must be either 'x' or 'y'") + + def __call__(self, field: FloatField): + """ + Fills cell quantity field using corners from itself and multipliers + in the direction specified initialization of the instance of this class. + """ + self._copy_corners(field, field) + + def corner_copy_x(field_to_copy): """Equivalent to the copy_corners_x functions in fortran. @@ -141,8 +196,7 @@ def corner_copy_y(field_to_copy): class CopyCornersX: """ - Helper-class to copy corners corresponding to the fortran functions - copy_corners_x or copy_corners_y respectively + Helper-class to copy corners corresponding to the fortran function copy_corners_x """ def __init__(self, stencil_factory: StencilFactory) -> None: @@ -157,8 +211,8 @@ def __call__(self, field: FloatField): class CopyCornersY: """ - Helper-class to copy corners corresponding to the fortran functions - copy_corners_x or copy_corners_y respectively + Helper-class to copy corners corresponding to the fortran function + copy_corners_y """ def __init__(self, stencil_factory: StencilFactory) -> None: @@ -1113,9 +1167,6 @@ def fill_corners_dgrid_defn( from __externals__ import i_end, i_start, j_end, j_start with computation(PARALLEL), interval(...): - # this line of code is used to fix the missing symbol crash due to the node visitor depth limitation - acoef = mysign - x_out = x_out # sw corner with horizontal(region[i_start - 1, j_start - 1]): x_out = mysign * y_in[0, 1, 0] From 34ba08511183e059ce6619b70e1ef87d6bad258c Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 24 Sep 2025 14:41:33 +0200 Subject: [PATCH 06/12] pc --- ndsl/stencils/corners.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py index 1b55fd96..e7097741 100644 --- a/ndsl/stencils/corners.py +++ b/ndsl/stencils/corners.py @@ -1,9 +1,10 @@ -from typing import Optional, Sequence, Tuple import warnings +from typing import Optional, Sequence, Tuple from gt4py.cartesian import gtscript from gt4py.cartesian.gtscript import PARALLEL, computation, horizontal, interval, region +from ndsl import StencilFactory, orchestrate from ndsl.constants import ( X_DIM, X_INTERFACE_DIM, @@ -11,9 +12,8 @@ Y_INTERFACE_DIM, Z_INTERFACE_DIM, ) -from ndsl.dsl.stencil import GridIndexing, StencilFactory +from ndsl.dsl.stencil import GridIndexing from ndsl.dsl.typing import FloatField -from ndsl import StencilFactory, orchestrate class CopyCorners: From 5eecb1a3ab87a5eef0af6be3e2c48817c5e68347 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 24 Sep 2025 14:42:07 +0200 Subject: [PATCH 07/12] add init --- ndsl/stencils/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ndsl/stencils/__init__.py b/ndsl/stencils/__init__.py index 43647b75..49c1958d 100644 --- a/ndsl/stencils/__init__.py +++ b/ndsl/stencils/__init__.py @@ -1,7 +1,14 @@ -from .corners import CopyCornersX, CopyCornersY, CopyCornersXY, FillCornersBGrid +from .corners import ( + CopyCorners, + CopyCornersX, + CopyCornersXY, + CopyCornersY, + FillCornersBGrid, +) __all__ = [ + "CopyCorners", "CopyCornersX", "CopyCornersY", "CopyCornersXY", From bbbb7dd2de0e619ed92a589c80f0fb7af3a1ba88 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Tue, 7 Oct 2025 18:06:13 +0200 Subject: [PATCH 08/12] remove em again --- ndsl/dsl/stencil_config.py | 2 +- ndsl/stencils/corners.py | 159 +------------------------------------ 2 files changed, 2 insertions(+), 159 deletions(-) diff --git a/ndsl/dsl/stencil_config.py b/ndsl/dsl/stencil_config.py index 4d3eafab..8d4ffcf5 100644 --- a/ndsl/dsl/stencil_config.py +++ b/ndsl/dsl/stencil_config.py @@ -168,7 +168,7 @@ def from_dict(cls, data: dict): class StencilConfig(Hashable): compare_to_numpy: bool = False compilation_config: CompilationConfig = CompilationConfig() - dace_config: Optional[DaceConfig] = None + dace_config: DaceConfig = None verbose: bool = False def __post_init__(self): diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py index e7097741..93f93297 100644 --- a/ndsl/stencils/corners.py +++ b/ndsl/stencils/corners.py @@ -26,7 +26,7 @@ def __init__(self, direction: str, stencil_factory: StencilFactory) -> None: """The grid for this stencil""" warnings.warn( "Usage of the GT4Py implementation of CopyCorners is discouraged and will" - "be removed in the next release. Use `CopyCornersX` or `CopyCornersY` directly" + "be removed in the next release. Use `CopyCornersX` or `CopyCornersY` in PyFV3" "for a more future-proof implementation of the same code.", DeprecationWarning, 2, @@ -68,163 +68,6 @@ def __call__(self, field: FloatField): self._copy_corners(field, field) -def corner_copy_x(field_to_copy): - """Equivalent to the copy_corners_x functions in fortran. - - This is written to operate on plain ndarrarys and not use the GT4Py framework. - This choice was made because we've seen a lot of performance left on the table using - orchestration without explicitly describing the operations but rather have full 3d- - sweeps with conditionals. - Since DaCe can handle (simple) operations on ndarrays directly this gives us a more - explicit entrypoint to the language and more optimization-potential. - - Args: - field_to_copy (ndarray): field to apply the corner copy on. - This is explicitly not type-hinted for orchestration - """ - field_to_copy[0, 0] = field_to_copy[0, 5] - field_to_copy[0, 1] = field_to_copy[1, 5] - field_to_copy[0, 2] = field_to_copy[2, 5] - - field_to_copy[1, 0] = field_to_copy[0, 4] - field_to_copy[1, 1] = field_to_copy[1, 4] - field_to_copy[1, 2] = field_to_copy[2, 4] - - field_to_copy[2, 0] = field_to_copy[0, 3] - field_to_copy[2, 1] = field_to_copy[1, 3] - field_to_copy[2, 2] = field_to_copy[2, 3] - - field_to_copy[0, -4] = field_to_copy[2, -7] - field_to_copy[0, -3] = field_to_copy[1, -7] - field_to_copy[0, -2] = field_to_copy[0, -7] - - field_to_copy[1, -4] = field_to_copy[2, -6] - field_to_copy[1, -3] = field_to_copy[1, -6] - field_to_copy[1, -2] = field_to_copy[0, -6] - - field_to_copy[2, -4] = field_to_copy[2, -5] - field_to_copy[2, -3] = field_to_copy[1, -5] - field_to_copy[2, -2] = field_to_copy[0, -5] - - field_to_copy[-4, 0] = field_to_copy[-2, 3] - field_to_copy[-4, 1] = field_to_copy[-3, 3] - field_to_copy[-4, 2] = field_to_copy[-4, 3] - - field_to_copy[-3, 0] = field_to_copy[-2, 4] - field_to_copy[-3, 1] = field_to_copy[-3, 4] - field_to_copy[-3, 2] = field_to_copy[-4, 4] - - field_to_copy[-2, 0] = field_to_copy[-2, 5] - field_to_copy[-2, 1] = field_to_copy[-3, 5] - field_to_copy[-2, 2] = field_to_copy[-4, 5] - - field_to_copy[-4, -2] = field_to_copy[-2, -5] - field_to_copy[-4, -3] = field_to_copy[-3, -5] - field_to_copy[-4, -4] = field_to_copy[-4, -5] - - field_to_copy[-3, -2] = field_to_copy[-2, -6] - field_to_copy[-3, -3] = field_to_copy[-3, -6] - field_to_copy[-3, -4] = field_to_copy[-4, -6] - - field_to_copy[-2, -2] = field_to_copy[-2, -7] - field_to_copy[-2, -3] = field_to_copy[-3, -7] - field_to_copy[-2, -4] = field_to_copy[-4, -7] - - -def corner_copy_y(field_to_copy): - """Equivalent to the copy_corners_y functions in fortran. - - This is written to operate on plain ndarrarys and not use the GT4Py framework. - This choice was made because we've seen a lot of performance left on the table using - orchestration without explicitly describing the operations but rather have full 3d- - sweeps with conditionals. - Since DaCe can handle (simple) operations on ndarrays directly this gives us a more - explicit entrypoint to the language and more optimization-potential. - - Args: - field_to_copy (ndarray): field to apply the corner copy on. - This is explicitly not type-hinted for orchestration - """ - field_to_copy[0, 0] = field_to_copy[5, 0] - field_to_copy[1, 0] = field_to_copy[5, 1] - field_to_copy[2, 0] = field_to_copy[5, 2] - - field_to_copy[0, 1] = field_to_copy[4, 0] - field_to_copy[1, 1] = field_to_copy[4, 1] - field_to_copy[2, 1] = field_to_copy[4, 2] - - field_to_copy[0, 2] = field_to_copy[3, 0] - field_to_copy[1, 2] = field_to_copy[3, 1] - field_to_copy[2, 2] = field_to_copy[3, 2] - - field_to_copy[-4, 0] = field_to_copy[-7, 2] - field_to_copy[-3, 0] = field_to_copy[-7, 1] - field_to_copy[-2, 0] = field_to_copy[-7, 0] - - field_to_copy[-4, 1] = field_to_copy[-6, 2] - field_to_copy[-3, 1] = field_to_copy[-6, 1] - field_to_copy[-2, 1] = field_to_copy[-6, 0] - - field_to_copy[-4, 2] = field_to_copy[-5, 2] - field_to_copy[-3, 2] = field_to_copy[-5, 1] - field_to_copy[-2, 2] = field_to_copy[-5, 0] - - field_to_copy[0, -2] = field_to_copy[5, -2] - field_to_copy[0, -3] = field_to_copy[4, -2] - field_to_copy[0, -4] = field_to_copy[3, -2] - - field_to_copy[1, -2] = field_to_copy[5, -3] - field_to_copy[1, -3] = field_to_copy[4, -3] - field_to_copy[1, -4] = field_to_copy[3, -3] - - field_to_copy[2, -2] = field_to_copy[5, -4] - field_to_copy[2, -3] = field_to_copy[4, -4] - field_to_copy[2, -4] = field_to_copy[3, -4] - - field_to_copy[-2, -4] = field_to_copy[-5, -2] - field_to_copy[-2, -3] = field_to_copy[-6, -2] - field_to_copy[-2, -2] = field_to_copy[-7, -2] - - field_to_copy[-3, -4] = field_to_copy[-5, -3] - field_to_copy[-3, -3] = field_to_copy[-6, -3] - field_to_copy[-3, -2] = field_to_copy[-7, -3] - - field_to_copy[-4, -4] = field_to_copy[-5, -4] - field_to_copy[-4, -3] = field_to_copy[-6, -4] - field_to_copy[-4, -2] = field_to_copy[-7, -4] - - -class CopyCornersX: - """ - Helper-class to copy corners corresponding to the fortran function copy_corners_x - """ - - def __init__(self, stencil_factory: StencilFactory) -> None: - orchestrate( - obj=self, - config=stencil_factory.config.dace_config, - ) - - def __call__(self, field: FloatField): - corner_copy_x(field) - - -class CopyCornersY: - """ - Helper-class to copy corners corresponding to the fortran function - copy_corners_y - """ - - def __init__(self, stencil_factory: StencilFactory) -> None: - orchestrate( - obj=self, - config=stencil_factory.config.dace_config, - ) - - def __call__(self, field: FloatField): - corner_copy_y(field) - - class CopyCornersXY: """ Helper-class to copy corners corresponding to the Fortran functions From 415c593ebe2d11b7fa8c436ba81f152e0edf8732 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Tue, 7 Oct 2025 18:08:41 +0200 Subject: [PATCH 09/12] clean up --- ndsl/stencils/__init__.py | 10 +--------- ndsl/stencils/corners.py | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/ndsl/stencils/__init__.py b/ndsl/stencils/__init__.py index 49c1958d..8a635187 100644 --- a/ndsl/stencils/__init__.py +++ b/ndsl/stencils/__init__.py @@ -1,16 +1,8 @@ -from .corners import ( - CopyCorners, - CopyCornersX, - CopyCornersXY, - CopyCornersY, - FillCornersBGrid, -) +from .corners import CopyCorners, CopyCornersXY, FillCornersBGrid __all__ = [ "CopyCorners", - "CopyCornersX", - "CopyCornersY", "CopyCornersXY", "FillCornersBGrid", ] diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py index 93f93297..d4d48a0d 100644 --- a/ndsl/stencils/corners.py +++ b/ndsl/stencils/corners.py @@ -4,7 +4,7 @@ from gt4py.cartesian import gtscript from gt4py.cartesian.gtscript import PARALLEL, computation, horizontal, interval, region -from ndsl import StencilFactory, orchestrate +from ndsl import StencilFactory from ndsl.constants import ( X_DIM, X_INTERFACE_DIM, From f570651afc5df6f863e721e5db163561c82eaa19 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 8 Oct 2025 17:09:08 +0200 Subject: [PATCH 10/12] proper typing on dace-config --- ndsl/dsl/stencil_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ndsl/dsl/stencil_config.py b/ndsl/dsl/stencil_config.py index 8d4ffcf5..619d86c6 100644 --- a/ndsl/dsl/stencil_config.py +++ b/ndsl/dsl/stencil_config.py @@ -168,7 +168,7 @@ def from_dict(cls, data: dict): class StencilConfig(Hashable): compare_to_numpy: bool = False compilation_config: CompilationConfig = CompilationConfig() - dace_config: DaceConfig = None + dace_config: DaceConfig = dataclasses.field(init=False) verbose: bool = False def __post_init__(self): @@ -181,7 +181,7 @@ def __post_init__(self): # We need a DaceConfig to know if orchestration is part of the build system # but we can't hash it very well (for now). The workaround is to make # sure we have a default Python orchestrated config. - if self.dace_config is None: + if not hasattr(self, "dace_config"): self.dace_config = DaceConfig( communicator=None, backend=self.compilation_config.backend, From 494840fab90add5848df98295b7249896fd52dab Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 8 Oct 2025 17:27:47 +0200 Subject: [PATCH 11/12] MISSING solution, because the rest does not work --- ndsl/dsl/stencil_config.py | 5 +++-- ndsl/types.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ndsl/dsl/stencil_config.py b/ndsl/dsl/stencil_config.py index 619d86c6..97f95540 100644 --- a/ndsl/dsl/stencil_config.py +++ b/ndsl/dsl/stencil_config.py @@ -10,6 +10,7 @@ from ndsl.comm.partitioner import Partitioner from ndsl.dsl.dace.dace_config import DaceConfig, DaCeOrchestration from ndsl.dsl.gt4py_utils import is_gpu_backend +from ndsl.types import MISSING class RunMode(enum.Enum): @@ -168,7 +169,7 @@ def from_dict(cls, data: dict): class StencilConfig(Hashable): compare_to_numpy: bool = False compilation_config: CompilationConfig = CompilationConfig() - dace_config: DaceConfig = dataclasses.field(init=False) + dace_config: DaceConfig = dataclasses.field(default=MISSING) verbose: bool = False def __post_init__(self): @@ -181,7 +182,7 @@ def __post_init__(self): # We need a DaceConfig to know if orchestration is part of the build system # but we can't hash it very well (for now). The workaround is to make # sure we have a default Python orchestrated config. - if not hasattr(self, "dace_config"): + if self.dace_config is MISSING: self.dace_config = DaceConfig( communicator=None, backend=self.compilation_config.backend, diff --git a/ndsl/types.py b/ndsl/types.py index e3461c39..f4d5bed8 100644 --- a/ndsl/types.py +++ b/ndsl/types.py @@ -14,7 +14,6 @@ def __call__(self, shape: Iterable[int], dtype: type): class NumpyModule(Protocol): - empty: Allocator zeros: Allocator ones: Allocator @@ -46,3 +45,7 @@ class AsyncRequest(Protocol): def wait(self): """Block the current thread waiting for the request to be completed""" ... + + +class MISSING: + """Class for optional arguments to dataclasses that can't trivially construct.""" From bc60f288d3c6e7dfe4cb3136cbf1b66149f49014 Mon Sep 17 00:00:00 2001 From: Tobias Wicky-Pfund Date: Wed, 8 Oct 2025 17:33:44 +0200 Subject: [PATCH 12/12] missing is also somewhat ugly --- ndsl/dsl/stencil_config.py | 34 +++++++++++++++++++++------------- ndsl/types.py | 4 ---- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/ndsl/dsl/stencil_config.py b/ndsl/dsl/stencil_config.py index 97f95540..0ca0da18 100644 --- a/ndsl/dsl/stencil_config.py +++ b/ndsl/dsl/stencil_config.py @@ -10,7 +10,6 @@ from ndsl.comm.partitioner import Partitioner from ndsl.dsl.dace.dace_config import DaceConfig, DaCeOrchestration from ndsl.dsl.gt4py_utils import is_gpu_backend -from ndsl.types import MISSING class RunMode(enum.Enum): @@ -169,26 +168,35 @@ def from_dict(cls, data: dict): class StencilConfig(Hashable): compare_to_numpy: bool = False compilation_config: CompilationConfig = CompilationConfig() - dace_config: DaceConfig = dataclasses.field(default=MISSING) verbose: bool = False + dace_config: DaceConfig = dataclasses.field(init=False) - def __post_init__(self): + def __init__( + self, + *, + compare_to_numpy: bool = False, + compilation_config: CompilationConfig = CompilationConfig(), + verbose: bool = False, + dace_config: DaceConfig | None = None, + ): + self.compare_to_numpy = compare_to_numpy + self.compilation_config = compilation_config + self.verbose = verbose + self.dace_config = ( + dace_config + if dace_config is not None + else DaceConfig( + communicator=None, + backend=self.compilation_config.backend, + orchestration=DaCeOrchestration.Python, + ) + ) self.backend_opts = { "device_sync": self.compilation_config.device_sync, "format_source": self.compilation_config.format_source, } self._hash = self._compute_hash() - # We need a DaceConfig to know if orchestration is part of the build system - # but we can't hash it very well (for now). The workaround is to make - # sure we have a default Python orchestrated config. - if self.dace_config is MISSING: - self.dace_config = DaceConfig( - communicator=None, - backend=self.compilation_config.backend, - orchestration=DaCeOrchestration.Python, - ) - @property def backend(self): return self.compilation_config.backend diff --git a/ndsl/types.py b/ndsl/types.py index f4d5bed8..2251789b 100644 --- a/ndsl/types.py +++ b/ndsl/types.py @@ -45,7 +45,3 @@ class AsyncRequest(Protocol): def wait(self): """Block the current thread waiting for the request to be completed""" ... - - -class MISSING: - """Class for optional arguments to dataclasses that can't trivially construct."""