From d0002b2eeadb00d044f4601472d38cc6ee45a912 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 7 Nov 2023 11:17:35 -0500
Subject: [PATCH] arch: suport rocm-smi for get_gpu_info

---
 .github/workflows/pytest-core-nompi.yml |  6 +--
 devito/arch/archinfo.py                 | 72 +++++++++++++++++++++++++
 devito/arch/compiler.py                 |  7 ++-
 devito/mpi/distributed.py               |  2 +
 tests/test_gpu_common.py                | 17 +++++-
 5 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pytest-core-nompi.yml b/.github/workflows/pytest-core-nompi.yml
index 1cbf3d510d..af0127b053 100644
--- a/.github/workflows/pytest-core-nompi.yml
+++ b/.github/workflows/pytest-core-nompi.yml
@@ -80,7 +80,7 @@ jobs:
         - name: pytest-ubuntu-py39-gcc9-omp
           python-version: '3.9'
           os: ubuntu-20.04
-          arch: "gcc-9"
+          arch: "custom"
           language: "openmp"
           sympy: "1.9"
 
@@ -140,7 +140,7 @@ jobs:
       id: set-run
 
     - name: Install ${{ matrix.arch }} compiler
-      if: "runner.os == 'linux' && !contains(matrix.name, 'docker')"
+      if: "runner.os == 'linux' && !contains(matrix.name, 'docker') && matrix.arch !='custom' "
       run : |
         sudo apt-get install -y ${{ matrix.arch }}
 
@@ -166,8 +166,6 @@ jobs:
 
     - name: Test with pytest
       run: |
-        ${{ env.RUN_CMD }} ${{ matrix.arch }} --version
-        ${{ env.RUN_CMD }} python3 --version
         ${{ env.RUN_CMD }} pytest -k "${{ matrix.test-set }}" -m "not parallel" --cov --cov-config=.coveragerc --cov-report=xml ${{ env.TESTS }}
 
     - name: Upload coverage to Codecov
diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py
index 0a5ac0588d..2b7fdb4b67 100644
--- a/devito/arch/archinfo.py
+++ b/devito/arch/archinfo.py
@@ -10,6 +10,7 @@
 import re
 import os
 import sys
+import json
 
 from devito.logger import warning
 from devito.tools import as_tuple, all_equal, memoized_func
@@ -249,6 +250,77 @@ def cbk(deviceid=0):
     except OSError:
         pass
 
+    # *** Second try: `rocm-smi`, clearly only works with AMD cards
+    try:
+        gpu_infos = {}
+
+        # Base gpu info
+        info_cmd = ['rocm-smi', '--showproductname']
+        proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
+        raw_info = str(proc.stdout.read())
+
+        lines = raw_info.replace('\\n', '\n').replace('b\'', '').replace('\\t', '')
+        lines = lines.splitlines()
+
+        for line in lines:
+            if 'GPU' in line:
+                # Product
+                pattern = r'GPU\[(\d+)\].*?Card series:\s*(.*?)\s*$'
+                match1 = re.match(pattern, line)
+
+                if match1:
+                    gid = match1.group(1)
+                    gpu_infos.setdefault(gid, dict())
+                    gpu_infos[gid]['physicalid'] = gid
+                    gpu_infos[gid]['product'] = match1.group(2)
+
+                # Model
+                pattern = r'GPU\[(\d+)\].*?Card model:\s*(.*?)\s*$'
+                match2 = re.match(pattern, line)
+
+                if match2:
+                    gid = match2.group(1)
+                    gpu_infos.setdefault(gid, dict())
+                    gpu_infos[gid]['physicalid'] = match2.group(1)
+                    gpu_infos[gid]['model'] = match2.group(2)
+
+        gpu_info = homogenise_gpus(list(gpu_infos.values()))
+
+        # Also attach callbacks to retrieve instantaneous memory info
+        info_cmd = ['rocm-smi', '--showmeminfo', 'vram', '--json']
+        proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
+        raw_info = str(proc.stdout.read())
+        lines = raw_info.replace('\\n', '').replace('b\'', '').replace('\'', '')
+        info = json.loads(lines)
+
+        for i in ['total', 'free', 'used']:
+            def make_cbk(i):
+                def cbk(deviceid=0):
+                    try:
+                        # Should only contain Used and total
+                        assert len(info['card%s' % deviceid]) == 2
+                        used = [int(v) for k, v in info['card%s' % deviceid].items()
+                                if 'Used' in k][0]
+                        total = [int(v) for k, v in info['card%s' % deviceid].items()
+                                 if 'Used' not in k][0]
+                        free = total - used
+                        return {'total': total, 'free': free, 'used': used}[i]
+                    except:
+                        # We shouldn't really end up here, unless nvidia-smi changes
+                        # the output format (though we still have tests in place that
+                        # will catch this)
+                        return None
+
+                return cbk
+
+            gpu_info['mem.%s' % i] = make_cbk(i)
+
+        gpu_infos['architecture'] = 'AMD'
+        return gpu_info
+
+    except OSError:
+        pass
+
     # *** Second try: `lshw`
     try:
         info_cmd = ['lshw', '-C', 'video']
diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py
index 7ec1d3da5b..08f509c5fe 100644
--- a/devito/arch/compiler.py
+++ b/devito/arch/compiler.py
@@ -862,8 +862,8 @@ def __new__(cls, *args, **kwargs):
 
         obj = super().__new__(cls)
         # Keep base to initialize accordingly
-        obj._base = _base
-        obj._cpp = _base._cpp
+        obj._base = kwargs.pop('base', _base)
+        obj._cpp = obj._base._cpp
 
         return obj
 
@@ -891,6 +891,9 @@ def __lookup_cmds__(self):
         self.MPICC = environ.get('MPICC', self.MPICC)
         self.MPICXX = environ.get('MPICXX', self.MPICXX)
 
+    def __new_with__(self, **kwargs):
+        return super().__new_with__(base=self._base, **kwargs)
+
 
 compiler_registry = {
     'custom': CustomCompiler,
diff --git a/devito/mpi/distributed.py b/devito/mpi/distributed.py
index 464a62a15a..9c8bcd0c8e 100644
--- a/devito/mpi/distributed.py
+++ b/devito/mpi/distributed.py
@@ -17,6 +17,8 @@
 from devito.types.utils import DimensionTuple
 
 
+__all__ = ['CustomTopology']
+
 # Do not prematurely initialize MPI
 # This allows launching a Devito program from within another Python program
 # that has *already* initialized MPI
diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py
index d3164722f2..c6594305fc 100644
--- a/tests/test_gpu_common.py
+++ b/tests/test_gpu_common.py
@@ -25,13 +25,26 @@ class TestGPUInfo(object):
 
     def test_get_gpu_info(self):
         info = get_gpu_info()
-        known = ['nvidia', 'tesla', 'geforce', 'quadro', 'unspecified']
+        known = ['nvidia', 'tesla', 'geforce', 'quadro', 'amd', 'unspecified']
         try:
             assert info['architecture'].lower() in known
         except KeyError:
             # There might be than one GPUs, but for now we don't care
             # as we're not really exploiting this info yet...
-            pass
+            pytest.xfail("Unsupported platform for get_gpu_info")
+
+    def custom_compiler(self):
+        grid = Grid(shape=(4, 4))
+
+        u = TimeFunction(name='u', grid=grid)
+
+        eqn = Eq(u.forward, u + 1)
+
+        with switchconfig(compiler='custom'):
+            op = Operator(eqn)()
+            # Check jit-compilation and correct execution
+            op.apply(time_M=10)
+            assert np.all(u.data[1] == 11)
 
 
 class TestCodeGeneration(object):