Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Avoid generating collapse(1) #2129

Merged
merged 1 commit into from
May 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions devito/passes/iet/languages/openacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ def _make_construct(cls, **kwargs):
return 'acc parallel loop'

@classmethod
def _make_clauses(cls, ncollapsed=None, reduction=None, tile=None, **kwargs):
def _make_clauses(cls, ncollapsed=0, reduction=None, tile=None, **kwargs):
clauses = []

if tile:
clauses.append('tile(%s)' % ','.join(str(i) for i in tile))
elif ncollapsed:
clauses.append('collapse(%d)' % (ncollapsed or 1))
elif ncollapsed > 1:
clauses.append('collapse(%d)' % ncollapsed)

if reduction:
clauses.append(cls._make_clause_reduction_from_imask(reduction))
Expand Down
5 changes: 3 additions & 2 deletions devito/passes/iet/languages/openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ def _make_construct(cls, parallel=False, **kwargs):
return 'omp for'

@classmethod
def _make_clauses(cls, ncollapsed=None, chunk_size=None, nthreads=None,
def _make_clauses(cls, ncollapsed=0, chunk_size=None, nthreads=None,
reduction=None, schedule=None, **kwargs):
clauses = []

clauses.append('collapse(%d)' % (ncollapsed or 1))
if ncollapsed > 1:
clauses.append('collapse(%d)' % ncollapsed)

if chunk_size is not False:
clauses.append('schedule(%s,%s)' % (schedule or 'dynamic',
Expand Down
8 changes: 4 additions & 4 deletions examples/performance/00_overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@
" const int tid = omp_get_thread_num();\n",
" float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
"\n",
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
" #pragma omp for schedule(dynamic,1)\n",
" for (int x = x_m; x <= x_M; x += 1)\n",
" {\n",
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
Expand Down Expand Up @@ -855,7 +855,7 @@
" const int tid = omp_get_thread_num();\n",
" float (*restrict r1)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr1[tid];\n",
"\n",
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
" #pragma omp for schedule(dynamic,1)\n",
" for (int x = x_m; x <= x_M; x += 1)\n",
" {\n",
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
Expand Down Expand Up @@ -991,7 +991,7 @@
" const int tid = omp_get_thread_num();\n",
" float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
"\n",
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
" #pragma omp for schedule(dynamic,1)\n",
" for (int x = x_m; x <= x_M; x += 1)\n",
" {\n",
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
Expand Down Expand Up @@ -1557,7 +1557,7 @@
" const int tid = omp_get_thread_num();\n",
" float (*restrict r2)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr2[tid];\n",
"\n",
" #pragma omp for collapse(1) schedule(dynamic,1)\n",
" #pragma omp for schedule(dynamic,1)\n",
" for (int x = x_m; x <= x_M; x += 1)\n",
" {\n",
" for (int y = y_m - 2; y <= y_M + 2; y += 1)\n",
Expand Down
23 changes: 14 additions & 9 deletions tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,8 @@ def test_incs_no_atomic(self):
# Now only `x` is parallelized
op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)],
opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1}))
assert 'collapse(1)' in str(op1)
assert 'omp for' in str(op1)
assert 'collapse' not in str(op1)
assert 'atomic' not in str(op1)

@pytest.mark.parametrize('exprs,simd_level,expected', [
Expand Down Expand Up @@ -879,18 +880,22 @@ def test_edge_cases(self, exprs, simd_level, expected):
for i, e in enumerate(list(exprs)):
exprs[i] = eval(e)

op = Operator(exprs, opt=('advanced', {'openmp': True}))
op = Operator(exprs, opt=('advanced', {'openmp': True,
'par-collapse-ncores': 1}))

iterations = FindNodes(Iteration).visit(op)
parallel = [i for i in iterations if i.is_Parallel]
try:
assert 'omp for collapse' in iterations[0].pragmas[0].value
assert 'omp for' in iterations[0].pragmas[0].value
if len(parallel) > 1 and simd_level is not None and simd_level > 1:
assert 'collapse' in iterations[0].pragmas[0].value
if simd_level:
assert 'omp simd' in iterations[simd_level].pragmas[0].value
except:
# E.g. gcc-5 doesn't support array reductions, so the compiler will
# generate different legal code
assert not Ompizer._support_array_reduction(configuration['compiler'])
assert any('omp for collapse' in i.pragmas[0].value
assert any('omp for' in i.pragmas[0].value
for i in iterations if i.pragmas)

op.apply()
Expand All @@ -910,7 +915,7 @@ def test_simd_space_invariant(self):
op = Operator(eq, opt=('advanced', {'openmp': True}))
iterations = FindNodes(Iteration).visit(op)

assert 'omp for collapse(1) schedule(static,1)' in iterations[0].pragmas[0].value
assert 'omp for schedule(static,1)' in iterations[0].pragmas[0].value
assert 'omp simd' in iterations[1].pragmas[0].value
assert 'omp simd' in iterations[3].pragmas[0].value

Expand Down Expand Up @@ -979,8 +984,8 @@ def test_basic(self):
bns, _ = assert_blocking(op, {'x0_blk0'})

iterations = FindNodes(Iteration).visit(bns['x0_blk0'])
assert iterations[0].pragmas[0].value == 'omp for collapse(1) schedule(dynamic,1)'
assert iterations[2].pragmas[0].value == ('omp parallel for collapse(1) '
assert iterations[0].pragmas[0].value == 'omp for schedule(dynamic,1)'
assert iterations[2].pragmas[0].value == ('omp parallel for '
'schedule(dynamic,1) '
'num_threads(nthreads_nested)')

Expand Down Expand Up @@ -1073,11 +1078,11 @@ def test_multiple_subnests_v1(self):
'omp for collapse(2) schedule(dynamic,1)'
assert not trees[0][2].pragmas
assert not trees[0][3].pragmas
assert trees[0][4].pragmas[0].value == ('omp parallel for collapse(1) '
assert trees[0][4].pragmas[0].value == ('omp parallel for '
'schedule(dynamic,1) '
'num_threads(nthreads_nested)')
assert not trees[1][2].pragmas
assert trees[1][3].pragmas[0].value == ('omp parallel for collapse(1) '
assert trees[1][3].pragmas[0].value == ('omp parallel for '
'schedule(dynamic,1) '
'num_threads(nthreads_nested)')

Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpu_openacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_tile_insteadof_collapse(self, par_tile):
'acc parallel loop tile(32,4) present(u)'
# Only the AFFINE Iterations are tiled
assert trees[3][1].pragmas[0].value ==\
'acc parallel loop collapse(1) present(src,src_coords,u)'
'acc parallel loop present(src,src_coords,u)'

@pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
((32, 4, 4), (8, 8, 8))])
Expand Down