-
Notifications
You must be signed in to change notification settings - Fork 231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
compiler: Fix parlang reductions over >= 4 loops #2417
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -504,13 +504,10 @@ def reduction_comms(clusters): | |
return processed | ||
|
||
|
||
def normalize(clusters, **kwargs): | ||
options = kwargs['options'] | ||
sregistry = kwargs['sregistry'] | ||
|
||
def normalize(clusters, sregistry=None, options=None, platform=None, **kwargs): | ||
clusters = normalize_nested_indexeds(clusters, sregistry) | ||
if options['mapify-reduce']: | ||
clusters = normalize_reductions_dense(clusters, sregistry) | ||
clusters = normalize_reductions_dense(clusters, sregistry, platform) | ||
else: | ||
clusters = normalize_reductions_minmax(clusters) | ||
clusters = normalize_reductions_sparse(clusters, sregistry) | ||
|
@@ -594,31 +591,49 @@ def normalize_reductions_minmax(cluster): | |
return init + [cluster.rebuild(processed)] | ||
|
||
|
||
def normalize_reductions_dense(cluster, sregistry): | ||
def normalize_reductions_dense(cluster, sregistry, platform): | ||
""" | ||
Extract the right-hand sides of reduction Eq's in to temporaries. | ||
""" | ||
return _normalize_reductions_dense(cluster, sregistry, {}) | ||
return _normalize_reductions_dense(cluster, {}, sregistry, platform) | ||
|
||
|
||
@cluster_pass(mode='dense') | ||
def _normalize_reductions_dense(cluster, sregistry, mapper): | ||
dims = [d for d in cluster.ispace.itdims | ||
if cluster.properties.is_parallel_atomic(d)] | ||
if not dims: | ||
def _normalize_reductions_dense(cluster, mapper, sregistry, platform): | ||
""" | ||
Transform augmented expressions whose left-hand side is a scalar into | ||
map-reduces. | ||
|
||
Examples | ||
-------- | ||
Given an increment expression such as | ||
|
||
s += f(u[x], v[x], ...) | ||
|
||
Turn it into | ||
|
||
r[x] = f(u[x], v[x], ...) | ||
s += r[x] | ||
""" | ||
# The candidate Dimensions along which to perform the map part | ||
candidates = [d for d in cluster.ispace.itdims | ||
if cluster.properties.is_parallel_atomic(d)] | ||
if not candidates: | ||
return cluster | ||
|
||
# If there are more parallel dimensions than the maximum allowed by the | ||
# target platform, we must restrain the number of candidates | ||
max_par_dims = platform.limits()['max-par-dims'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because (see comment above) some of these limits may be e.g. programming-model dependent, besides being architecture-dependent There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The it's the same as having the compiler store it's complex/f16 dtype it doesn't belong there and should be part of a pass with if/dispatch |
||
dims = candidates[-max_par_dims:] | ||
|
||
# All other dimensions must be sequentialized because the output Array | ||
# is constrained to `dims` | ||
sequentialize = candidates[:-max_par_dims] | ||
|
||
processed = [] | ||
properties = cluster.properties | ||
for e in cluster.exprs: | ||
if e.is_Reduction: | ||
# Transform `e` into what is in essence an explicit map-reduce | ||
# For example, turn: | ||
# `s += f(u[x], v[x], ...)` | ||
# into | ||
# `r[x] = f(u[x], v[x], ...)` | ||
# `s += r[x]` | ||
# This makes it much easier to parallelize the map part regardless | ||
# of the target backend | ||
lhs, rhs = e.args | ||
|
||
try: | ||
|
@@ -650,10 +665,13 @@ def _normalize_reductions_dense(cluster, sregistry, mapper): | |
|
||
processed.extend([Eq(a.indexify(), rhs), | ||
e.func(lhs, a.indexify())]) | ||
|
||
for d in sequentialize: | ||
properties = properties.sequentialize(d) | ||
else: | ||
processed.append(e) | ||
|
||
return cluster.rebuild(processed) | ||
return cluster.rebuild(exprs=processed, properties=properties) | ||
|
||
|
||
@cluster_pass(mode='sparse') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do
compiler
andlanguage
get used anywhere? Why couldn't this be a property?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
they will in the future yeah, some limits may be compiler and/or language dependent