Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions ndsl/dsl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Literal precision for both GT4Py & NDSL
import os
import platform
import sys
from typing import Literal

Expand Down Expand Up @@ -36,15 +35,6 @@ def _get_literal_precision(default: Literal["32", "64"] = "64") -> Literal["32",
os.environ["GT4PY_LITERAL_INT_PRECISION"] = str(NDSL_GLOBAL_PRECISION)
os.environ["GT4PY_LITERAL_FLOAT_PRECISION"] = str(NDSL_GLOBAL_PRECISION)

# OpenMP handling

detected_macos = platform.system() == "Darwin"
if detected_macos:
ndsl_log.warning(
"Multithreading is deactivated under MacOS due to apple-clang not handling OpenMP by default."
)
os.environ["GT4PY_CARTESIAN_ENABLE_OPENMP"] = "False" if detected_macos else "True"


# Set cache names for default gt backends workflow
import gt4py.cartesian.config # noqa: E402
Expand Down
21 changes: 13 additions & 8 deletions ndsl/dsl/dace/dace_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import dace.config
from gt4py.cartesian.config import GT4PY_COMPILE_OPT_LEVEL
from gt4py.cartesian.utils.compiler import cxx_compiler_defaults, gpu_configuration

from ndsl import LocalComm
from ndsl.comm.communicator import Communicator
Expand Down Expand Up @@ -226,23 +227,18 @@ def __init__(
else:
dace.config.Config.set("compiler", "build_type", value="Release")

# Required to True for gt4py storage/memory
dace.config.Config.set(
"compiler",
"allow_view_arguments",
value=True,
)
# Resolve "march/mtune" option for GPU
# - turn on numeric-centric SSE by default
# - Neoverse-V2 Grace CPU is too new for GCC 14 and -march=native will fail
# - use alternative march=armv8-a instead
march_cpu = "armv8-a" if is_arm_neoverse else "native"
# Removed --fmath
cxx_defaults = cxx_compiler_defaults(GT4PY_COMPILE_OPT_LEVEL)
dace.config.Config.set(
"compiler",
"cpu",
"args",
value=f"-march={march_cpu} -std=c++17 -fPIC -Wall -Wextra -O{optimization_level}",
value=f"-march={march_cpu} -std=c++17 -fPIC -Wall -Wextra -O{optimization_level} {cxx_defaults.cxx_compile_flags}",
)
# Potentially buggy - deactivate
dace.config.Config.set(
Expand All @@ -257,11 +253,12 @@ def __init__(
# - use alternative mcpu=native instead
march_option = "-mcpu=native" if is_arm_neoverse else "-march=native"
# Removed --fast-math
gpu_config = gpu_configuration(GT4PY_COMPILE_OPT_LEVEL)
dace.config.Config.set(
"compiler",
"cuda",
"args",
value=f"-std=c++14 -Xcompiler -fPIC -O3 -Xcompiler {march_option}",
value=f"-std=c++14 -Xcompiler -fPIC -O{optimization_level} -Xcompiler {march_option} {gpu_config.gpu_compile_flags}",
)

cuda_sm = cp.cuda.Device(0).compute_capability if cp else 60
Expand All @@ -280,6 +277,14 @@ def __init__(
"max_concurrent_streams",
value=-1, # no concurrent streams, every kernel on defaultStream
)

# Required to True for gt4py storage/memory
dace.config.Config.set(
"compiler",
"allow_view_arguments",
value=True,
)

# Speed up built time
dace.config.Config.set(
"compiler",
Expand Down