From c1e011cf1693d02491db910f33bab7ae027bfa11 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 6 Jul 2023 13:05:53 -0400 Subject: [PATCH 1/7] Add floating point precision to GEOS bridge init --- dsl/pace/dsl/typing.py | 6 +++++- fv3core/pace/fv3core/initialization/geos_wrapper.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dsl/pace/dsl/typing.py b/dsl/pace/dsl/typing.py index 05b255ce..d67dd7b6 100644 --- a/dsl/pace/dsl/typing.py +++ b/dsl/pace/dsl/typing.py @@ -22,11 +22,15 @@ DTypes = Union[bool, np.bool_, int, np.int32, np.int64, float, np.float32, np.float64] +def floating_point_precision() -> int: + return int(os.getenv("PACE_FLOAT_PRECISION", "64")) + + def global_set_floating_point_precision(): """Set the global floating point precision for all reference to Float in the codebase. Defaults to 64 bit.""" global Float - precision_in_bit = int(os.getenv("PACE_FLOAT_PRECISION", "64")) + precision_in_bit = floating_point_precision() if precision_in_bit == 64: return np.float64 elif precision_in_bit == 32: diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 2835e77e..9fbb98ab 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -10,6 +10,7 @@ from pace import fv3core from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate +from pace.dsl.typing import floating_point_precision from pace.dsl.gt4py_utils import is_gpu_backend from pace.util.logging import pace_log @@ -136,6 +137,7 @@ def __init__( f" dt : {self.dycore_state.bdt}\n" f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" f" backend: {backend}\n" + f" float : {floating_point_precision()}bit" f" orchestration: {self._is_orchestrated}\n" f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" ) From c58a2a126c83cc0a1014bfa824a7b78f7fd1cd19 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 6 Jul 2023 14:28:24 -0400 Subject: [PATCH 2/7] lint --- .../fv3core/initialization/geos_wrapper.py | 2 +- util/pace/util/grid/eta.py | 64 +++++++++---------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 9fbb98ab..8ca5c890 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -10,8 +10,8 @@ from pace import fv3core from pace.driver.performance.collector import PerformanceCollector from pace.dsl.dace import DaceConfig, orchestrate -from pace.dsl.typing import floating_point_precision from pace.dsl.gt4py_utils import is_gpu_backend +from pace.dsl.typing import floating_point_precision from pace.util.logging import pace_log diff --git a/util/pace/util/grid/eta.py b/util/pace/util/grid/eta.py index 075bc920..dc37aaa2 100644 --- a/util/pace/util/grid/eta.py +++ b/util/pace/util/grid/eta.py @@ -206,7 +206,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 91: - ak = np.array( [ 1.00000000, @@ -402,7 +401,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 72: - ak = np.array( [ 1.00000000, @@ -560,7 +558,6 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: ) elif km == 137: - ak = np.array( [ 1.00000000, @@ -761,35 +758,35 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: 0.00000000, 0.00000000, 0.00000000, - 7.00000010E-06, - 2.40000008E-05, - 5.90000018E-05, - 1.12000002E-04, - 1.99000002E-04, - 3.39999999E-04, - 5.61999972E-04, - 8.90000025E-04, - 1.35300006E-03, - 1.99200003E-03, - 2.85700010E-03, - 3.97100020E-03, - 5.37799997E-03, - 7.13300006E-03, - 9.26099997E-03, - 1.18060000E-02, - 1.48160001E-02, - 1.83179993E-02, - 2.23549996E-02, - 2.69639995E-02, - 3.21759991E-02, - 3.80260013E-02, - 4.45480011E-02, - 5.17730005E-02, - 5.97280003E-02, - 6.84479997E-02, - 7.79580027E-02, - 8.82859975E-02, - 9.94620025E-02, + 7.00000010e-06, + 2.40000008e-05, + 5.90000018e-05, + 1.12000002e-04, + 1.99000002e-04, + 3.39999999e-04, + 5.61999972e-04, + 8.90000025e-04, + 1.35300006e-03, + 1.99200003e-03, + 2.85700010e-03, + 3.97100020e-03, + 5.37799997e-03, + 7.13300006e-03, + 9.26099997e-03, + 1.18060000e-02, + 1.48160001e-02, + 1.83179993e-02, + 2.23549996e-02, + 2.69639995e-02, + 3.21759991e-02, + 3.80260013e-02, + 4.45480011e-02, + 5.17730005e-02, + 5.97280003e-02, + 6.84479997e-02, + 7.79580027e-02, + 8.82859975e-02, + 9.94620025e-02, 0.111505002, 0.124448001, 0.138312995, @@ -849,7 +846,8 @@ def set_hybrid_pressure_coefficients(km: int) -> HybridPressureCoefficients: else: raise NotImplementedError( - "Only grids with 72, 79, 91 or 137 vertical levels have been implemented so far" + "Only grids with 72, 79, 91 or 137 vertical levels" + "have been implemented so far" ) if 0.0 in bk: From 8e362a2913360c6998a402c11c7132b4add3f3bb Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 7 Jul 2023 09:50:04 -0400 Subject: [PATCH 3/7] Add device PCI bus id (for MPS debug) --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 8ca5c890..87ce0193 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -5,6 +5,7 @@ import f90nml import numpy as np +from pace.util._optional_imports import cupy as cp import pace.util from pace import fv3core @@ -132,6 +133,11 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() + device_ordinal_info = "" + if is_gpu_backend(): + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + ) pace_log.info( "Pace GEOS wrapper initialized: \n" f" dt : {self.dycore_state.bdt}\n" @@ -139,7 +145,8 @@ def __init__( f" backend: {backend}\n" f" float : {floating_point_precision()}bit" f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" {device_ordinal_info}" ) def _critical_path(self): From adc5ee501a0b36b8478cf751012fc619ee832b44 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Fri, 7 Jul 2023 09:59:24 -0400 Subject: [PATCH 4/7] Typo + lint --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 87ce0193..7f8f05d3 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -5,7 +5,6 @@ import f90nml import numpy as np -from pace.util._optional_imports import cupy as cp import pace.util from pace import fv3core @@ -13,6 +12,7 @@ from pace.dsl.dace import DaceConfig, orchestrate from pace.dsl.gt4py_utils import is_gpu_backend from pace.dsl.typing import floating_point_precision +from pace.util._optional_imports import cupy as cp from pace.util.logging import pace_log @@ -134,7 +134,7 @@ def __init__( self._allocate_output_dir() device_ordinal_info = "" - if is_gpu_backend(): + if is_gpu_backend(backend): device_ordinal_info = ( f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" ) From 39ff8ead23a35754b455c89781f1413a087bfc6a Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Thu, 3 Aug 2023 15:31:14 -0400 Subject: [PATCH 5/7] Try to detect MPS reading the "log" pipe --- .../fv3core/initialization/geos_wrapper.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 7f8f05d3..8143da2b 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -133,20 +133,30 @@ def __init__( self.output_dict: Dict[str, np.ndarray] = {} self._allocate_output_dir() - device_ordinal_info = "" - if is_gpu_backend(backend): - device_ordinal_info = ( - f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" - ) + # Feedback information + device_ordinal_info = ( + f" Device PCI bus id: {cp.cuda.Device(0).pci_bus_id}\n" + if is_gpu_backend(backend) + else "N/A" + ) + MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) + MPS_is_on = ( + True + if MPS_pipe_directory + and is_gpu_backend(backend) + and os.path.exists(f"{MPS_pipe_directory}/log") + else False + ) pace_log.info( "Pace GEOS wrapper initialized: \n" - f" dt : {self.dycore_state.bdt}\n" - f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" - f" backend: {backend}\n" - f" float : {floating_point_precision()}bit" - f" orchestration: {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" dt : {self.dycore_state.bdt}\n" + f" bridge : {self._fortran_mem_space} > {self._pace_mem_space}\n" + f" backend : {backend}\n" + f" float : {floating_point_precision()}bit" + f" orchestration : {self._is_orchestrated}\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" f" {device_ordinal_info}" + f" Nvidia MPS : {MPS_is_on}" ) def _critical_path(self): From f2d171dc5903560c991932c5a14ccf15536012c8 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 8 Aug 2023 14:15:14 -0400 Subject: [PATCH 6/7] Lint --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index 8143da2b..de0b944c 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -154,7 +154,8 @@ def __init__( f" backend : {backend}\n" f" float : {floating_point_precision()}bit" f" orchestration : {self._is_orchestrated}\n" - f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz} (halo: {sizer.n_halo})\n" + f" sizer : {sizer.nx}x{sizer.ny}x{sizer.nz}" + f"(halo: {sizer.n_halo})\n" f" {device_ordinal_info}" f" Nvidia MPS : {MPS_is_on}" ) From ac70398415a3c7dcff9fa56a9385618a1113f8d9 Mon Sep 17 00:00:00 2001 From: Florian Deconinck Date: Tue, 8 Aug 2023 14:16:06 -0400 Subject: [PATCH 7/7] Clean up --- fv3core/pace/fv3core/initialization/geos_wrapper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fv3core/pace/fv3core/initialization/geos_wrapper.py b/fv3core/pace/fv3core/initialization/geos_wrapper.py index de0b944c..f7133543 100644 --- a/fv3core/pace/fv3core/initialization/geos_wrapper.py +++ b/fv3core/pace/fv3core/initialization/geos_wrapper.py @@ -141,11 +141,9 @@ def __init__( ) MPS_pipe_directory = os.getenv("CUDA_MPS_PIPE_DIRECTORY", None) MPS_is_on = ( - True - if MPS_pipe_directory + MPS_pipe_directory and is_gpu_backend(backend) and os.path.exists(f"{MPS_pipe_directory}/log") - else False ) pace_log.info( "Pace GEOS wrapper initialized: \n"