-
Notifications
You must be signed in to change notification settings - Fork 229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
arch: Add ICX support #2051
arch: Add ICX support #2051
Changes from all commits
248c0c9
e61fb86
7645fc7
4138905
9f0652c
22861f7
bd9cb84
b95bbd5
ff8387c
8e966a8
0f9960f
2be15ba
6f0052a
8cd3677
e355ed8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,9 @@ | |
from codepy.jit import compile_from_string | ||
from codepy.toolchain import GCCToolchain | ||
|
||
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, SKX, POWER8, POWER9, GRAVITON, | ||
get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path) | ||
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON, | ||
INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime, | ||
get_m1_llvm_path) | ||
from devito.exceptions import CompilationError | ||
from devito.logger import debug, warning, error | ||
from devito.parameters import configuration | ||
|
@@ -375,13 +376,22 @@ class GNUCompiler(Compiler): | |
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
self.cflags += ['-march=native', '-Wno-unused-result', '-Wno-unused-variable', | ||
'-Wno-unused-but-set-variable'] | ||
platform = kwargs.pop('platform', configuration['platform']) | ||
|
||
self.cflags += ['-march=native', '-Wno-unused-result', | ||
'-Wno-unused-variable', '-Wno-unused-but-set-variable'] | ||
|
||
if configuration['safe-math']: | ||
self.cflags.append('-fno-unsafe-math-optimizations') | ||
else: | ||
self.cflags.append('-ffast-math') | ||
|
||
if isinstance(platform, IntelSkylake): | ||
# The default is `=256` because avx512 slows down the CPU frequency; | ||
# however, we empirically found that stencils generally benefit | ||
# from `=512` | ||
self.cflags.append('-mprefer-vector-width=512') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jkwashbourne we're adding this to gcc's default compilation flags, so, once this PR lands, should u see regressions, please let us know. I've only seen improvements so far (though only up to 5%) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ping (merging) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
language = kwargs.pop('language', configuration['language']) | ||
try: | ||
if self.version >= Version("4.9.0"): | ||
|
@@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs): | |
class ClangCompiler(Compiler): | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(ClangCompiler, self).__init__(*args, **kwargs) | ||
super().__init__(*args, **kwargs) | ||
|
||
self.cflags += ['-Wno-unused-result', '-Wno-unused-variable'] | ||
if not configuration['safe-math']: | ||
|
@@ -481,7 +491,7 @@ class AOMPCompiler(Compiler): | |
"""AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards.""" | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(AOMPCompiler, self).__init__(*args, **kwargs) | ||
super().__init__(*args, **kwargs) | ||
|
||
self.cflags += ['-Wno-unused-result', '-Wno-unused-variable'] | ||
if not configuration['safe-math']: | ||
|
@@ -531,7 +541,7 @@ def __lookup_cmds__(self): | |
class PGICompiler(Compiler): | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(PGICompiler, self).__init__(*args, cpp=True, **kwargs) | ||
super().__init__(*args, cpp=True, **kwargs) | ||
|
||
self.cflags.remove('-std=c99') | ||
self.cflags.remove('-O3') | ||
|
@@ -671,39 +681,30 @@ def __lookup_cmds__(self): | |
class IntelCompiler(Compiler): | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(IntelCompiler, self).__init__(*args, **kwargs) | ||
|
||
self.cflags.append("-xhost") | ||
super().__init__(*args, **kwargs) | ||
|
||
language = kwargs.pop('language', configuration['language']) | ||
platform = kwargs.pop('platform', configuration['platform']) | ||
language = kwargs.pop('language', configuration['language']) | ||
self.cflags.append("-xHost") | ||
|
||
if configuration['safe-math']: | ||
self.cflags.append("-fp-model=strict") | ||
else: | ||
self.cflags.append('-fast') | ||
self.cflags.append('-fp-model=fast') | ||
|
||
if platform is SKX: | ||
if isinstance(platform, IntelSkylake): | ||
# Systematically use 512-bit vectors on skylake | ||
self.cflags.append("-qopt-zmm-usage=high") | ||
|
||
try: | ||
if self.version >= Version("15.0.0"): | ||
# Append the OpenMP flag regardless of configuration['language'], | ||
# since icc15 and later versions implement OpenMP 4.0, hence | ||
# they support `#pragma omp simd` | ||
self.ldflags.append('-qopenmp') | ||
except (TypeError, ValueError): | ||
if language == 'openmp': | ||
# Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0 | ||
self.ldflags.append('-fopenmp') | ||
if language == 'openmp': | ||
self.ldflags.append('-qopenmp') | ||
|
||
# Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is | ||
if kwargs.get('mpi'): | ||
ver = check_output([self.MPICC, "--version"]).decode("utf-8") | ||
if not ver.startswith("icc"): | ||
warning("The MPI compiler `%s` doesn't use the Intel " | ||
"C/C++ compiler underneath" % self.MPICC) | ||
mpi_distro = sniff_mpi_distro('mpiexec') | ||
if mpi_distro != 'IntelMPI': | ||
warning("Expected Intel MPI distribution with `%s`, but found `%s`" | ||
% (self.__class__.__name__, mpi_distro)) | ||
|
||
def __lookup_cmds__(self): | ||
self.CC = 'icc' | ||
|
@@ -727,16 +728,55 @@ def __lookup_cmds__(self): | |
class IntelKNLCompiler(IntelCompiler): | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(IntelKNLCompiler, self).__init__(*args, **kwargs) | ||
super().__init__(*args, **kwargs) | ||
|
||
self.cflags += ["-xMIC-AVX512"] | ||
self.cflags.append('-xMIC-AVX512') | ||
georgebisbas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
language = kwargs.pop('language', configuration['language']) | ||
|
||
if language != 'openmp': | ||
warning("Running on Intel KNL without OpenMP is highly discouraged") | ||
|
||
|
||
class OneapiCompiler(IntelCompiler): | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
platform = kwargs.pop('platform', configuration['platform']) | ||
language = kwargs.pop('language', configuration['language']) | ||
|
||
if language == 'openmp': | ||
self.ldflags.remove('-qopenmp') | ||
self.ldflags.append('-fopenmp') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uff, really? does ICC support -fopenmp instead of -qopenmp (or perhaps both?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typically it supports both, but until this fix we discussed through mail is in, they will not work. |
||
|
||
if language == 'sycl': | ||
self.cflags.append('-fsycl') | ||
if platform is NVIDIAX: | ||
self.cflags.append('-fsycl-targets=nvptx64-cuda') | ||
else: | ||
self.cflags.append('-fsycl-targets=spir64') | ||
|
||
if platform is NVIDIAX: | ||
self.cflags.append('-fopenmp-targets=nvptx64-cuda') | ||
if platform is INTELGPUX: | ||
self.cflags.append('-fopenmp-targets=spir64') | ||
self.cflags.append('-fopenmp-target-simd') | ||
|
||
if platform is INTELGPUX: | ||
self.cflags.remove('-g') # -g disables some optimizations in IGC | ||
self.cflags.append('-gline-tables-only') | ||
self.cflags.append('-fdebug-info-for-profiling') | ||
|
||
def __lookup_cmds__(self): | ||
# OneAPI HPC ToolKit comes with icpx, which is clang++, | ||
# and icx, which is clang | ||
self.CC = 'icx' | ||
self.CXX = 'icpx' | ||
self.MPICC = 'mpicc' | ||
self.MPICX = 'mpicx' | ||
|
||
|
||
class CustomCompiler(Compiler): | ||
|
||
""" | ||
|
@@ -800,9 +840,11 @@ def __lookup_cmds__(self): | |
'nvidia': NvidiaCompiler, | ||
'cuda': CudaCompiler, | ||
'osx': ClangCompiler, | ||
'intel': IntelCompiler, | ||
'icpc': IntelCompiler, | ||
'intel': OneapiCompiler, | ||
'icx': OneapiCompiler, | ||
'icpx': OneapiCompiler, | ||
'icc': IntelCompiler, | ||
'icpc': IntelCompiler, | ||
'intel-knl': IntelKNLCompiler, | ||
'knl': IntelKNLCompiler, | ||
'dpcpp': DPCPPCompiler, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -701,7 +701,6 @@ def test_everything(): | |
assert np.all(u.data == u1.data) | ||
|
||
|
||
@skipif('cpu64-icc') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it somehow works now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! |
||
@pytest.mark.parametrize('subdomain', ['domain', 'interior']) | ||
def test_stencil_issue_1915(subdomain): | ||
nt = 5 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I rename to intel-icc, intel-icx?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just icc or icx ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
οκ