Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dsl/pace/dsl/dace/dace_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ def __init__(
if cp:
cuda_sm = cp.cuda.Device(0).compute_capability
dace.config.Config.set("compiler", "cuda", "cuda_arch", value=f"{cuda_sm}")
# Block size/thread count is defaulted to an average value for recent
# hardware (Pascal and upward). The problem of setting an optimized
# block/thread is both hardware and problem dependant. Fine tuners
# available in DaCe should be relied on for futher tuning of this value.
dace.config.Config.set(
"compiler", "cuda", "default_block_size", value="64,8,1"
)
Expand Down
8 changes: 6 additions & 2 deletions dsl/pace/dsl/dace/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import time
from dataclasses import dataclass, field
from typing import Dict, List, Optional
Expand Down Expand Up @@ -242,6 +243,8 @@ def kernel_theoretical_timing(
n = 1000
m = 4
dt = []
# Warm up run (build, allocation)
# to remove from timing the common runtime
bench(A, B, n)
# Time
for _ in range(m):
Expand Down Expand Up @@ -296,6 +299,9 @@ def kernel_theoretical_timing(
except TypeError:
newresult_in_us = (alldata_in_bytes / bandwidth_in_bytes_s) * in_us

# We keep sympy import here because sympy is known to be a problematic
# import and an heavy module which should be avoided if possible.
# TODO: refactor it out by shadow-coding the sympy.Max/Eval functions
import sympy

if node.label in result:
Expand Down Expand Up @@ -333,8 +339,6 @@ def report_kernel_theoretical_timing(
with open("kernel_theoretical_timing.csv", "w") as f:
f.write(csv_string)
elif out_format == "json":
import json

with open("kernel_theoretical_timing.json", "w") as f:
json.dump(timings, f, indent=2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module load boost/1.72.0
module load python/3.9

# clone Pace and update submodules
git clone --recursive https://github.com/ai2cm/pace
git clone --recursive https://github.com/NOAA-GFDL/pace
cd pace

# create a conda environment for pace
Expand Down
27 changes: 27 additions & 0 deletions examples/build_scripts/build_gaea_c5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

# Example bash script to install Pace to run bare-metal on Gaea's c4 cluster

set -e -x

# module load necessary system software
module rm PrgEnv-intel
module load PrgEnv-gnu
module rm gcc
module load gcc/12.2.0
module load boost/1.79.0
module load python/3.9

# clone Pace and update submodules
git clone --recursive https://github.com/NOAA-GFDL/pace
cd pace

# create a conda environment for pace
conda create -y --name my_name python=3.8

# enter the environment and update it
conda activate my_name
pip3 install --upgrade pip setuptools wheel

# install the Pace dependencies, GT4Py, and Pace
pip3 install -r requirements_dev.txt -c constraints.txt
24 changes: 16 additions & 8 deletions fv3core/pace/fv3core/stencils/d_sw.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def __init__(
orchestrate(obj=self, config=stencil_factory.config.dace_config)
self.grid_data = grid_data
self._f0 = self.grid_data.fC_agrid
self._d_con = config.d_con
self._do_stochastic_ke_backscatter = config.do_skeb

self.grid_indexing = stencil_factory.grid_indexing
assert config.grid_type < 3, "ubke and vbke only implemented for grid_type < 3"
Expand Down Expand Up @@ -927,12 +929,15 @@ def make_quantity():
},
)
)
self._accumulate_heat_source_and_dissipation_estimate_stencil = (
stencil_factory.from_dims_halo(
func=accumulate_heat_source_and_dissipation_estimate,
compute_dims=[X_DIM, Y_DIM, Z_DIM],

if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter):
self._accumulate_heat_source_and_dissipation_estimate_stencil = (
stencil_factory.from_dims_halo(
func=accumulate_heat_source_and_dissipation_estimate,
compute_dims=[X_DIM, Y_DIM, Z_DIM],
)
)
)

self._compute_vorticity_stencil = stencil_factory.from_dims_halo(
compute_vorticity,
compute_dims=[X_DIM, Y_DIM, Z_DIM],
Expand Down Expand Up @@ -1246,9 +1251,12 @@ def __call__(
self._tmp_diss_e,
self._column_namelist["d_con"],
)
self._accumulate_heat_source_and_dissipation_estimate_stencil(
self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est
)

if (self._d_con > 1.0e-5) or (self._do_stochastic_ke_backscatter):
self._accumulate_heat_source_and_dissipation_estimate_stencil(
self._tmp_heat_s, heat_source, self._tmp_diss_e, diss_est
)

self._update_u_and_v_stencil(
self._tmp_ut,
self._tmp_vt,
Expand Down
9 changes: 6 additions & 3 deletions fv3core/pace/fv3core/stencils/fv_dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ def __init__(
dace_compiletime_args=["state"],
)
if timestep == timedelta(seconds=0):
raise RuntimeError("Bad dynamical core configuration: bdt is 0")
raise RuntimeError(
"Bad dynamical core configuration:"
" the atmospheric timestep is 0 seconds!"
)
# nested and stretched_grid are options in the Fortran code which we
# have not implemented, so they are hard-coded here.
self.call_checkpointer = checkpointer is not None
Expand Down Expand Up @@ -543,8 +546,8 @@ def _compute(self, state: DycoreState, timer: pace.util.Timer):

# TODO: When NQ=9, we shouldn't need to pass qcld explicitly
# since it's in self.tracers. It should not be an issue since
# we don't have self.tracers & qcld computation at
# the same time.
# we don't have self.tracers & qcld computation at the same
# time
# When NQ=8, we do need qcld passed explicitely
self._lagrangian_to_eulerian_obj(
self.tracers,
Expand Down