Skip to content

Commit

Permalink
Merge pull request #2261 from devitocodes/tweak-custom-cuda
Browse files Browse the repository at this point in the history
arch: support rocm for gpu info
  • Loading branch information
mloubout authored Nov 10, 2023
2 parents e707027 + d0002b2 commit 730ed09
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 12 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/pytest-core-nompi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
- name: pytest-ubuntu-py39-gcc9-omp
python-version: '3.9'
os: ubuntu-20.04
arch: "gcc-9"
arch: "custom"
language: "openmp"
sympy: "1.9"

Expand Down Expand Up @@ -140,7 +140,7 @@ jobs:
id: set-run

- name: Install ${{ matrix.arch }} compiler
if: "runner.os == 'linux' && !contains(matrix.name, 'docker')"
if: "runner.os == 'linux' && !contains(matrix.name, 'docker') && matrix.arch !='custom' "
run : |
sudo apt-get install -y ${{ matrix.arch }}
Expand All @@ -166,8 +166,6 @@ jobs:
- name: Test with pytest
run: |
${{ env.RUN_CMD }} ${{ matrix.arch }} --version
${{ env.RUN_CMD }} python3 --version
${{ env.RUN_CMD }} pytest -k "${{ matrix.test-set }}" -m "not parallel" --cov --cov-config=.coveragerc --cov-report=xml ${{ env.TESTS }}
- name: Upload coverage to Codecov
Expand Down
72 changes: 72 additions & 0 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re
import os
import sys
import json

from devito.logger import warning
from devito.tools import as_tuple, all_equal, memoized_func
Expand Down Expand Up @@ -249,6 +250,77 @@ def cbk(deviceid=0):
except OSError:
pass

# *** Second try: `rocm-smi`, clearly only works with AMD cards
try:
gpu_infos = {}

# Base gpu info
info_cmd = ['rocm-smi', '--showproductname']
proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
raw_info = str(proc.stdout.read())

lines = raw_info.replace('\\n', '\n').replace('b\'', '').replace('\\t', '')
lines = lines.splitlines()

for line in lines:
if 'GPU' in line:
# Product
pattern = r'GPU\[(\d+)\].*?Card series:\s*(.*?)\s*$'
match1 = re.match(pattern, line)

if match1:
gid = match1.group(1)
gpu_infos.setdefault(gid, dict())
gpu_infos[gid]['physicalid'] = gid
gpu_infos[gid]['product'] = match1.group(2)

# Model
pattern = r'GPU\[(\d+)\].*?Card model:\s*(.*?)\s*$'
match2 = re.match(pattern, line)

if match2:
gid = match2.group(1)
gpu_infos.setdefault(gid, dict())
gpu_infos[gid]['physicalid'] = match2.group(1)
gpu_infos[gid]['model'] = match2.group(2)

gpu_info = homogenise_gpus(list(gpu_infos.values()))

# Also attach callbacks to retrieve instantaneous memory info
info_cmd = ['rocm-smi', '--showmeminfo', 'vram', '--json']
proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
raw_info = str(proc.stdout.read())
lines = raw_info.replace('\\n', '').replace('b\'', '').replace('\'', '')
info = json.loads(lines)

for i in ['total', 'free', 'used']:
def make_cbk(i):
def cbk(deviceid=0):
try:
# Should only contain Used and total
assert len(info['card%s' % deviceid]) == 2
used = [int(v) for k, v in info['card%s' % deviceid].items()
if 'Used' in k][0]
total = [int(v) for k, v in info['card%s' % deviceid].items()
if 'Used' not in k][0]
free = total - used
return {'total': total, 'free': free, 'used': used}[i]
except:
# We shouldn't really end up here, unless nvidia-smi changes
# the output format (though we still have tests in place that
# will catch this)
return None

return cbk

gpu_info['mem.%s' % i] = make_cbk(i)

gpu_infos['architecture'] = 'AMD'
return gpu_info

except OSError:
pass

# *** Second try: `lshw`
try:
info_cmd = ['lshw', '-C', 'video']
Expand Down
23 changes: 17 additions & 6 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,27 +862,38 @@ def __new__(cls, *args, **kwargs):

obj = super().__new__(cls)
# Keep base to initialize accordingly
obj._base = _base
obj._cpp = _base._cpp
obj._base = kwargs.pop('base', _base)
obj._cpp = obj._base._cpp

return obj

def __init_finalize__(self, **kwargs):
self._base.__init_finalize__(self, **kwargs)
# Update cflags
extrac = environ.get('CFLAGS', '').split(' ')
self.cflags = filter_ordered(self.cflags + extrac)
try:
extrac = environ.get('CFLAGS').split(' ')
self.cflags = self.cflags + extrac
except AttributeError:
pass
# Update ldflags
extrald = environ.get('LDFLAGS', '').split(' ')
self.ldflags = filter_ordered(self.ldflags + extrald)
try:
extrald = environ.get('LDFLAGS').split(' ')
self.ldflags = self.ldflags + extrald
except AttributeError:
pass

def __lookup_cmds__(self):
self._base.__lookup_cmds__(self)
# TODO: check for conflicts, for example using the nvhpc module file
# will set CXX to nvc++ breaking the cuda backend
self.CC = environ.get('CC', self.CC)
self.CXX = environ.get('CXX', self.CXX)
self.MPICC = environ.get('MPICC', self.MPICC)
self.MPICXX = environ.get('MPICXX', self.MPICXX)

def __new_with__(self, **kwargs):
return super().__new_with__(base=self._base, **kwargs)


compiler_registry = {
'custom': CustomCompiler,
Expand Down
2 changes: 2 additions & 0 deletions devito/mpi/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from devito.types.utils import DimensionTuple


__all__ = ['CustomTopology']

# Do not prematurely initialize MPI
# This allows launching a Devito program from within another Python program
# that has *already* initialized MPI
Expand Down
17 changes: 15 additions & 2 deletions tests/test_gpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,26 @@ class TestGPUInfo(object):

def test_get_gpu_info(self):
info = get_gpu_info()
known = ['nvidia', 'tesla', 'geforce', 'quadro', 'unspecified']
known = ['nvidia', 'tesla', 'geforce', 'quadro', 'amd', 'unspecified']
try:
assert info['architecture'].lower() in known
except KeyError:
# There might be than one GPUs, but for now we don't care
# as we're not really exploiting this info yet...
pass
pytest.xfail("Unsupported platform for get_gpu_info")

def custom_compiler(self):
grid = Grid(shape=(4, 4))

u = TimeFunction(name='u', grid=grid)

eqn = Eq(u.forward, u + 1)

with switchconfig(compiler='custom'):
op = Operator(eqn)()
# Check jit-compilation and correct execution
op.apply(time_M=10)
assert np.all(u.data[1] == 11)


class TestCodeGeneration(object):
Expand Down

0 comments on commit 730ed09

Please sign in to comment.