Skip to content

Commit

Permalink
compiler: Refresh autotuning
Browse files Browse the repository at this point in the history
misc: Add blockrelax for linear algebra
  • Loading branch information
georgebisbas committed Jun 2, 2022
1 parent 1fa9db5 commit f08a52a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 14 deletions.
10 changes: 5 additions & 5 deletions benchmarks/user/advisor/run_advisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def run_with_advisor(path, output, name, exec_args):
os.environ['DEVITO_LANGUAGE'] = 'openmp'

# Thread pinning is strongly recommended for reliable results.
# We support thread pinning via numactl
# This script is using numactl for this purpose. Users may want to set their
# own pinning: https://hpc-wiki.info/hpc/Binding/Pinning
try:
ret = check_output(['numactl', '--show']).decode("utf-8")
ret = dict(i.split(':') for i in ret.split('\n') if i)
Expand All @@ -99,7 +100,6 @@ def run_with_advisor(path, output, name, exec_args):
# `stackoverflow.com/questions/17053671/python-how-do-you-stop-numpy-from-multithreading` # noqa
os.environ['NUMEXPR_NUM_THREADS'] = '1'


# To build a roofline with Advisor, we need to run two analyses back to
# back, `survey` and `tripcounts`.

Expand All @@ -118,7 +118,7 @@ def run_with_advisor(path, output, name, exec_args):
'-run-pass-thru=--no-altstack', # Avoids `https://software.intel.com/en-us/vtune-amplifier-help-error-message-stack-size-is-too-small` # noqa
'-run-pass-thru=-timestamp=sys', # Avoids 'VTune Amplifier may detect which timer source to use incorrectly on Intel® Xeon® processor E5-XXXX processors (200287361)' # noqa
'-strategy ldconfig:notrace:notrace', # Avoids `https://software.intel.com/en-us/forums/intel-vtune-amplifier-xe/topic/779309` # noqa
'-start-paused', # The generated code will enable/disable Advisor on a loop basis according to the decorated pragmas
'-start-paused', # The generated code will enable/disable Advisor on a loop basis according to the decorated pragmas # noqa
]
advisor_flops = [
'--collect=tripcounts',
Expand All @@ -130,8 +130,8 @@ def run_with_advisor(path, output, name, exec_args):
]
py_cmd = [sys.executable, str(path)] + exec_args.split()

# Before collecting the `survey` and `tripcounts` a "pure" python run to warmup the jit cache
# is preceded
# Before collecting the `survey` and `tripcounts` a "pure" python run to warmup the
# jit cache is preceded

log('Starting Intel Advisor\'s `roofline` analysis for `%s`' % name)
dt = datetime.datetime.now()
Expand Down
14 changes: 6 additions & 8 deletions devito/core/autotuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,13 @@ def autotune(operator, args, level, mode):
# Detect the time-stepping Iteration; shrink its iteration range so that
# each autotuning run only takes a few iterations
steppers = {i for i in flatten(trees) if i.dim.is_Time}
if len(steppers) == 0:
stepper = None
timesteps = 1
elif len(steppers) == 1:
if len(steppers) == 1:
stepper = steppers.pop()
timesteps = init_time_bounds(stepper, at_args, args)
if timesteps is None:
return args, {}
else:
warning("cannot perform autotuning unless there is one time loop; skipping")
warning("cannot perform autotuning with %d time loops; skipping" % len(steppers))
return args, {}

# Use a fresh Timer for auto-tuning
Expand Down Expand Up @@ -220,7 +217,7 @@ def init_time_bounds(stepper, at_args, args):


def check_time_bounds(stepper, at_args, args, mode):
if mode != 'runtime' or stepper is None:
if mode != 'runtime':
return True
dim = stepper.dim.root
if stepper.direction is Backward:
Expand Down Expand Up @@ -319,8 +316,9 @@ def generate_block_shapes(blockable, args, level):
for bs in list(ret):
handle = []
for v in options['blocksize-l1']:
# To be a valid blocksize, it must be strictly smaller than
# and divide evenly the parent's block size
# To be a valid block size, it must be smaller than
# and divide evenly the parent's block size.
# Blocksizes equal to the parent's block size are not included
if all(v < i and i % v == 0 for _, i in bs):
ret.append(bs + tuple((d.step, v) for d in level_1))
ret.remove(bs)
Expand Down
2 changes: 1 addition & 1 deletion examples/misc/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def callback_shape(ctx, param, value):

def callback_opts(ctx, param, value):
if value is True:
return ('blocking', 'simd', 'openmp', {'blockinner': True})
return ('advanced', {'blockinner': True, 'blockrelax': True})
else:
return 'noop'

Expand Down

0 comments on commit f08a52a

Please sign in to comment.