Skip to content

Commit

Permalink
compiler: Implement integer block shapes
Browse files Browse the repository at this point in the history
  • Loading branch information
FabioLuporini committed Feb 10, 2022
1 parent 69b146d commit 1f28853
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 94 deletions.
15 changes: 5 additions & 10 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from functools import partial

from devito.core.operator import CoreOperator, CustomOperator
from devito.core.operator import CoreOperator, CustomOperator, ParTile
from devito.exceptions import InvalidOperator
from devito.passes.equations import collect_derivatives
from devito.passes.clusters import (Lift, blocking, buffering, cire, cse,
Expand Down Expand Up @@ -34,11 +34,6 @@ class Cpu64OperatorMixin(object):
situations where the performance impact might be detrimental.
"""

BLOCK_STEP = None
"""
The loop blocking step size. None => symbolic.
"""

CIRE_MINGAIN = 10
"""
Minimum operation count reduction for a redundant expression to be optimized
Expand Down Expand Up @@ -103,8 +98,8 @@ def _normalize_kwargs(cls, **kwargs):
o['blockeager'] = oo.pop('blockeager', cls.BLOCK_EAGER)
o['blocklazy'] = oo.pop('blocklazy', not o['blockeager'])
o['blockrelax'] = oo.pop('blockrelax', cls.BLOCK_RELAX)
o['blockstep'] = oo.pop('blockstep', cls.BLOCK_STEP)
o['skewing'] = oo.pop('skewing', False)
o['par-tile'] = ParTile(oo.pop('par-tile', False), default=16)

# CIRE
o['min-storage'] = oo.pop('min-storage', False)
Expand Down Expand Up @@ -193,7 +188,7 @@ def _specialize_clusters(cls, clusters, **kwargs):

# Blocking to improve data locality
if options['blockeager']:
clusters = blocking(clusters, options)
clusters = blocking(clusters, sregistry, options)

# Reduce flops
clusters = extract_increments(clusters, sregistry)
Expand All @@ -209,7 +204,7 @@ def _specialize_clusters(cls, clusters, **kwargs):

# Blocking to improve data locality
if options['blocklazy']:
clusters = blocking(clusters, options)
clusters = blocking(clusters, sregistry, options)

return clusters

Expand Down Expand Up @@ -292,7 +287,7 @@ def callback(f):

return {
'buffering': lambda i: buffering(i, callback, sregistry, options),
'blocking': lambda i: blocking(i, options),
'blocking': lambda i: blocking(i, sregistry, options),
'factorize': factorize,
'fission': fission,
'fuse': lambda i: fuse(i, options=options),
Expand Down
16 changes: 5 additions & 11 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from devito.core.operator import CoreOperator, CustomOperator
from devito.core.operator import CoreOperator, CustomOperator, ParTile
from devito.exceptions import InvalidOperator
from devito.passes.equations import collect_derivatives
from devito.passes.clusters import (Lift, Streaming, Tasker, blocking, buffering,
Expand Down Expand Up @@ -37,11 +37,6 @@ class DeviceOperatorMixin(object):
situations where the performance impact might be detrimental.
"""

BLOCK_STEP = None
"""
The loop blocking step size. None => symbolic.
"""

CIRE_MINGAIN = 10
"""
Minimum operation count reduction for a redundant expression to be optimized
Expand Down Expand Up @@ -86,7 +81,6 @@ def _normalize_kwargs(cls, **kwargs):
o['blockeager'] = oo.pop('blockeager', cls.BLOCK_EAGER)
o['blocklazy'] = oo.pop('blocklazy', not o['blockeager'])
o['blockrelax'] = oo.pop('blockrelax', cls.BLOCK_RELAX)
o['blockstep'] = oo.pop('blockstep', cls.BLOCK_STEP)
o['skewing'] = oo.pop('skewing', False)

# CIRE
Expand All @@ -98,7 +92,7 @@ def _normalize_kwargs(cls, **kwargs):
o['cire-schedule'] = oo.pop('cire-schedule', cls.CIRE_SCHEDULE)

# GPU parallelism
o['par-tile'] = oo.pop('par-tile', False) # Control tile parallelism
o['par-tile'] = ParTile(oo.pop('par-tile', False), default=(32, 4))
o['par-collapse-ncores'] = 1 # Always collapse (meaningful if `par-tile=False`)
o['par-collapse-work'] = 1 # Always collapse (meaningful if `par-tile=False`)
o['par-chunk-nonaffine'] = oo.pop('par-chunk-nonaffine', cls.PAR_CHUNK_NONAFFINE)
Expand Down Expand Up @@ -183,7 +177,7 @@ def _specialize_clusters(cls, clusters, **kwargs):

# Blocking to define thread blocks
if options['blockeager']:
clusters = blocking(clusters, options)
clusters = blocking(clusters, sregistry, options)

# Reduce flops
clusters = extract_increments(clusters, sregistry)
Expand All @@ -199,7 +193,7 @@ def _specialize_clusters(cls, clusters, **kwargs):

# Blocking to define thread blocks
if options['blocklazy']:
clusters = blocking(clusters, options)
clusters = blocking(clusters, sregistry, options)

return clusters

Expand Down Expand Up @@ -270,7 +264,7 @@ def callback(f):

return {
'buffering': lambda i: buffering(i, callback, sregistry, options),
'blocking': lambda i: blocking(i, options),
'blocking': lambda i: blocking(i, sregistry, options),
'tasking': Tasker(runs_on_host).process,
'streaming': Streaming(reads_if_on_host).process,
'factorize': factorize,
Expand Down
34 changes: 32 additions & 2 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from devito.logger import warning
from devito.parameters import configuration
from devito.operator import Operator
from devito.tools import as_tuple, timed_pass
from devito.tools import as_tuple, is_integer, timed_pass
from devito.types import NThreads

__all__ = ['CoreOperator', 'CustomOperator']
__all__ = ['CoreOperator', 'CustomOperator',
# Optimization options
'ParTile']


class BasicOperator(Operator):
Expand Down Expand Up @@ -208,3 +210,31 @@ def _specialize_iet(cls, graph, **kwargs):
passes_mapper['linearize'](graph)

return graph


# Wrappers for optimization options


class OptOption(object):
pass


class ParTile(tuple, OptOption):

def __new__(cls, items, default=None):
if not items:
return None
elif isinstance(items, bool):
if not default:
raise ValueError("Expected `default` value, got None")
items = (as_tuple(default),)
elif isinstance(items, tuple):
# Normalize to tuple of tuples
if is_integer(items[0]):
items = (items,)
else:
items = tuple(tuple(i) for i in items)
else:
raise ValueError("Expected bool or tuple, got %s instead" % type(items))

return super().__new__(cls, items)
4 changes: 2 additions & 2 deletions devito/ir/clusters/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _process_fdta(self, clusters, level, prefix=None, **kwargs):

return processed

def _process_fatd(self, clusters, level, **kwargs):
def _process_fatd(self, clusters, level, prefix=None, **kwargs):
"""
fatd -> First Apply Then Divide
"""
Expand All @@ -72,7 +72,7 @@ def _process_fatd(self, clusters, level, **kwargs):
# Apply callback
_clusters = self.callback(list(g), pfx, **kwargs)
# Recursion
processed.extend(self._process_fatd(_clusters, level + 1, **kwargs))
processed.extend(self._process_fatd(_clusters, level + 1, pfx, **kwargs))

return processed

Expand Down
Loading

0 comments on commit 1f28853

Please sign in to comment.