Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arch: Add ICX support #2051

Merged
merged 15 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
dockerfile: './docker/Dockerfile.cpu'
runner: ubuntu-latest

- tag: 'devitocodes/bases:cpu-icc'
- tag: 'devitocodes/bases:cpu-icc, devitocodes/bases:cpu-icx'
arch: 'arch=icc'
version: ''
dockerfile: './docker/Dockerfile.cpu'
Expand Down
14 changes: 11 additions & 3 deletions .github/workflows/pytest-core-nompi.yml
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I rename to intel-icc, intel-icx?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just icc or icx ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

οκ

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ jobs:
pytest-ubuntu-py39-gcc9-omp,
pytest-osx-py37-clang-omp,
pytest-docker-py37-gcc-omp,
pytest-docker-py37-icc-omp
pytest-docker-py37-icc-omp,
pytest-docker-py38-icx-omp
]
set: [base, adjoint]
include:
Expand Down Expand Up @@ -105,6 +106,13 @@ jobs:
language: "openmp"
sympy: "1.11"

- name: pytest-docker-py38-icx-omp
python-version: '3.8'
os: ubuntu-22.04
arch: "icx"
language: "openmp"
sympy: "1.11"

- set: base
test-set: 'not adjoint'

Expand Down Expand Up @@ -133,13 +141,13 @@ jobs:
- name: Set run prefix
run: |
if [[ "${{ matrix.name }}" =~ "docker" ]]; then
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV
echo "RUN_CMD=docker run --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e DEVITO_ARCH=${{ matrix.arch }} --name testrun devito_img" >> $GITHUB_ENV
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved
else
echo "RUN_CMD=" >> $GITHUB_ENV
fi
id: set-run

- name: Install GCC ${{ matrix.arch }}
- name: Install ${{ matrix.arch }} compiler
if: "runner.os == 'linux' && !contains(matrix.name, 'docker')"
run : |
sudo apt-get install -y ${{ matrix.arch }}
Expand Down
35 changes: 25 additions & 10 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@

__all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_nvidia_cc',
'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', 'get_m1_llvm_path',
'Platform', 'Cpu64', 'Intel64', 'Amd', 'Arm', 'Power', 'Device',
'NvidiaDevice', 'AmdDevice', 'IntelDevice',
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'KNL', 'KNL7210', # Intel
'AMD', 'ARM', 'M1', 'GRAVITON', # ARM
'POWER8', 'POWER9', # Other loosely supported CPU architectures
'AMDGPUX', 'NVIDIAX', 'INTELGPUX'] # GPUs
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice',
# Intel
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
'SKX', 'KLX', 'CLX', 'CLK',
# ARM
'AMD', 'ARM', 'M1', 'GRAVITON',
# Other loosely supported CPU architectures
'POWER8', 'POWER9',
# GPUs
'AMDGPUX', 'NVIDIAX', 'INTELGPUX']


@memoized_func
Expand Down Expand Up @@ -494,7 +499,7 @@ def get_platform():
if 'phi' in brand:
# Intel Xeon Phi?
return platform_registry['knl']
# Unknown Xeon ? May happen on some virtualizes systems...
# Unknown Xeon ? May happen on some virtualized systems...
return platform_registry['intel64']
elif 'intel' in brand:
# Most likely a desktop i3/i5/i7
Expand Down Expand Up @@ -607,6 +612,14 @@ class Intel64(Cpu64):
known_isas = ('cpp', 'sse', 'avx', 'avx2', 'avx512')


class IntelSkylake(Intel64):
pass


class IntelGoldenCode(Intel64):
pass


class Arm(Cpu64):

known_isas = ('fp', 'asimd', 'asimdrdm')
Expand Down Expand Up @@ -725,11 +738,12 @@ def march(cls):
IVB = Intel64('ivb')
HSW = Intel64('hsw')
BDW = Intel64('bdw', isa='avx2')
SKX = Intel64('skx')
KLX = Intel64('klx')
CLX = Intel64('clx')
KNL = Intel64('knl')
KNL7210 = Intel64('knl', cores_logical=256, cores_physical=64, isa='avx512')
SKX = IntelSkylake('skx')
KLX = IntelSkylake('klx')
CLX = IntelSkylake('clx')
CLK = IntelSkylake('clk')

ARM = Arm('arm')
GRAVITON = Arm('graviton')
Expand All @@ -756,6 +770,7 @@ def march(cls):
'skx': SKX, # Skylake
'klx': KLX, # Kaby Lake
'clx': CLX, # Coffee Lake
'clk': CLK, # Cascade Lake
'knl': KNL,
'knl7210': KNL7210,
'arm': ARM, # Generic ARM CPU
Expand Down
104 changes: 73 additions & 31 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
from codepy.jit import compile_from_string
from codepy.toolchain import GCCToolchain

from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, SKX, POWER8, POWER9, GRAVITON,
get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path)
from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime,
get_m1_llvm_path)
from devito.exceptions import CompilationError
from devito.logger import debug, warning, error
from devito.parameters import configuration
Expand Down Expand Up @@ -375,13 +376,22 @@ class GNUCompiler(Compiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.cflags += ['-march=native', '-Wno-unused-result', '-Wno-unused-variable',
'-Wno-unused-but-set-variable']
platform = kwargs.pop('platform', configuration['platform'])

self.cflags += ['-march=native', '-Wno-unused-result',
'-Wno-unused-variable', '-Wno-unused-but-set-variable']

if configuration['safe-math']:
self.cflags.append('-fno-unsafe-math-optimizations')
else:
self.cflags.append('-ffast-math')

if isinstance(platform, IntelSkylake):
# The default is `=256` because avx512 slows down the CPU frequency;
# however, we empirically found that stencils generally benefit
# from `=512`
self.cflags.append('-mprefer-vector-width=512')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkwashbourne we're adding this to gcc's default compilation flags, so, once this PR lands, should u see regressions, please let us know. I've only seen improvements so far (though only up to 5%)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping (merging)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


language = kwargs.pop('language', configuration['language'])
try:
if self.version >= Version("4.9.0"):
Expand Down Expand Up @@ -414,7 +424,7 @@ def __init__(self, *args, **kwargs):
class ClangCompiler(Compiler):

def __init__(self, *args, **kwargs):
super(ClangCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
Expand Down Expand Up @@ -481,7 +491,7 @@ class AOMPCompiler(Compiler):
"""AMD's fork of Clang for OpenMP offloading on both AMD and NVidia cards."""

def __init__(self, *args, **kwargs):
super(AOMPCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
Expand Down Expand Up @@ -531,7 +541,7 @@ def __lookup_cmds__(self):
class PGICompiler(Compiler):

def __init__(self, *args, **kwargs):
super(PGICompiler, self).__init__(*args, cpp=True, **kwargs)
super().__init__(*args, cpp=True, **kwargs)

self.cflags.remove('-std=c99')
self.cflags.remove('-O3')
Expand Down Expand Up @@ -671,39 +681,30 @@ def __lookup_cmds__(self):
class IntelCompiler(Compiler):

def __init__(self, *args, **kwargs):
super(IntelCompiler, self).__init__(*args, **kwargs)

self.cflags.append("-xhost")
super().__init__(*args, **kwargs)

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])
self.cflags.append("-xHost")

if configuration['safe-math']:
self.cflags.append("-fp-model=strict")
else:
self.cflags.append('-fast')
self.cflags.append('-fp-model=fast')

if platform is SKX:
if isinstance(platform, IntelSkylake):
# Systematically use 512-bit vectors on skylake
self.cflags.append("-qopt-zmm-usage=high")

try:
if self.version >= Version("15.0.0"):
# Append the OpenMP flag regardless of configuration['language'],
# since icc15 and later versions implement OpenMP 4.0, hence
# they support `#pragma omp simd`
self.ldflags.append('-qopenmp')
except (TypeError, ValueError):
if language == 'openmp':
# Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0
self.ldflags.append('-fopenmp')
if language == 'openmp':
self.ldflags.append('-qopenmp')

# Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is
if kwargs.get('mpi'):
ver = check_output([self.MPICC, "--version"]).decode("utf-8")
if not ver.startswith("icc"):
warning("The MPI compiler `%s` doesn't use the Intel "
"C/C++ compiler underneath" % self.MPICC)
mpi_distro = sniff_mpi_distro('mpiexec')
if mpi_distro != 'IntelMPI':
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
% (self.__class__.__name__, mpi_distro))

def __lookup_cmds__(self):
self.CC = 'icc'
Expand All @@ -727,16 +728,55 @@ def __lookup_cmds__(self):
class IntelKNLCompiler(IntelCompiler):

def __init__(self, *args, **kwargs):
super(IntelKNLCompiler, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)

self.cflags += ["-xMIC-AVX512"]
self.cflags.append('-xMIC-AVX512')
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved

language = kwargs.pop('language', configuration['language'])

if language != 'openmp':
warning("Running on Intel KNL without OpenMP is highly discouraged")


class OneapiCompiler(IntelCompiler):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

if language == 'openmp':
self.ldflags.remove('-qopenmp')
self.ldflags.append('-fopenmp')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uff, really? does ICC support -fopenmp instead of -qopenmp (or perhaps both?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically it supports both, but until this fix we discussed through mail is in, they will not work.
This can be temporary and will be dropped after we also drop intel compiler


if language == 'sycl':
self.cflags.append('-fsycl')
if platform is NVIDIAX:
self.cflags.append('-fsycl-targets=nvptx64-cuda')
else:
self.cflags.append('-fsycl-targets=spir64')

if platform is NVIDIAX:
self.cflags.append('-fopenmp-targets=nvptx64-cuda')
if platform is INTELGPUX:
self.cflags.append('-fopenmp-targets=spir64')
self.cflags.append('-fopenmp-target-simd')

if platform is INTELGPUX:
self.cflags.remove('-g') # -g disables some optimizations in IGC
self.cflags.append('-gline-tables-only')
self.cflags.append('-fdebug-info-for-profiling')

def __lookup_cmds__(self):
# OneAPI HPC ToolKit comes with icpx, which is clang++,
# and icx, which is clang
self.CC = 'icx'
self.CXX = 'icpx'
self.MPICC = 'mpicc'
self.MPICX = 'mpicx'


class CustomCompiler(Compiler):

"""
Expand Down Expand Up @@ -800,9 +840,11 @@ def __lookup_cmds__(self):
'nvidia': NvidiaCompiler,
'cuda': CudaCompiler,
'osx': ClangCompiler,
'intel': IntelCompiler,
'icpc': IntelCompiler,
'intel': OneapiCompiler,
'icx': OneapiCompiler,
'icpx': OneapiCompiler,
'icc': IntelCompiler,
'icpc': IntelCompiler,
'intel-knl': IntelKNLCompiler,
'knl': IntelKNLCompiler,
'dpcpp': DPCPPCompiler,
Expand Down
7 changes: 5 additions & 2 deletions devito/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,11 @@ class switchconfig(object):
Decorator to temporarily change `configuration` parameters.
"""

def __init__(self, **params):
self.params = {k.replace('_', '-'): v for k, v in params.items()}
def __init__(self, condition=True, **params):
if condition:
self.params = {k.replace('_', '-'): v for k, v in params.items()}
else:
self.params = {}

def __call__(self, func, *args, **kwargs):
@wraps(func)
Expand Down
12 changes: 10 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from devito.checkpointing import NoopRevolver
from devito.finite_differences.differentiable import EvalDerivative
from devito.arch import Cpu64, Device, sniff_mpi_distro, Arm
from devito.arch.compiler import compiler_registry, IntelCompiler, NvidiaCompiler
from devito.arch.compiler import (compiler_registry, IntelCompiler, OneapiCompiler,
NvidiaCompiler)
from devito.ir.iet import (FindNodes, FindSymbols, Iteration, ParallelBlock,
retrieve_iteration_tree)
from devito.tools import as_tuple
Expand All @@ -26,7 +27,8 @@ def skipif(items, whole_module=False):
# Sanity check
accepted = set()
accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc',
'device-aomp', 'cpu64-icc', 'cpu64-nvc', 'cpu64-arm', 'chkpnt'})
'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc', 'cpu64-arm',
'cpu64-icpx', 'chkpnt'})
accepted.update({'nompi', 'nodevice'})
unknown = sorted(set(items) - accepted)
if unknown:
Expand Down Expand Up @@ -70,6 +72,12 @@ def skipif(items, whole_module=False):
isinstance(configuration['platform'], Cpu64):
skipit = "`icc+cpu64` won't work with this test"
break
# Skip if it won't run with OneAPICompiler
if i == 'cpu64-icx' and \
georgebisbas marked this conversation as resolved.
Show resolved Hide resolved
isinstance(configuration['compiler'], OneapiCompiler) and \
isinstance(configuration['platform'], Cpu64):
skipit = "`icx+cpu64` won't work with this test"
break
# Skip if it won't run on Arm
if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm):
skipit = "Arm doesn't support x86-specific instructions"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

from benchmarks.user.benchmark import run
from devito import configuration, switchconfig
from conftest import skipif
from subprocess import check_call


@skipif('cpu64-icx')
@pytest.mark.parametrize('mode, problem, op', [
('run', 'acoustic', 'forward'), ('run', 'acoustic', 'adjoint'),
('run', 'acoustic', 'jacobian'), ('run', 'acoustic', 'jacobian_adjoint'),
Expand Down
1 change: 0 additions & 1 deletion tests/test_buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,6 @@ def test_everything():
assert np.all(u.data == u1.data)


@skipif('cpu64-icc')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it somehow works now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!

@pytest.mark.parametrize('subdomain', ['domain', 'interior'])
def test_stencil_issue_1915(subdomain):
nt = 5
Expand Down
5 changes: 4 additions & 1 deletion tests/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
Dimension, DefaultDimension, SubDimension, switchconfig,
SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
CustomDimension, dimensions)
CustomDimension, dimensions, configuration)
from devito.arch.compiler import IntelCompiler, OneapiCompiler
from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
FindSymbols, retrieve_iteration_tree)
from devito.symbolics import indexify, retrieve_functions, IntDiv
Expand Down Expand Up @@ -1382,6 +1383,8 @@ def test_affiness(self):
iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time]
assert all(i.is_Affine for i in iterations)

@switchconfig(condition=isinstance(configuration['compiler'],
(IntelCompiler, OneapiCompiler)), safe_math=True)
def test_sparse_time_function(self):
nt = 20

Expand Down