Skip to content

Commit

Permalink
Attempt 2 to fix the to_gpu bug
Browse files Browse the repository at this point in the history
  • Loading branch information
ThrudPrimrose committed Oct 31, 2024
1 parent 5763e65 commit 3c7cd76
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions dace/transformation/interstate/gpu_transform_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,29 @@ def apply(self, _, sdfg: sd.SDFG):
#######################################################
# Step 7: Wrap free tasklets and nested SDFGs with a GPU map

# Extend global_code_nodes with tasklets that write/read from an array
# Previous steps map all arrays to GPU storage, but only checks tasklets that write to/read from
# Scalars to be wrapped in a GPU Map
for state in sdfg.states():
for node in state.nodes():
if isinstance(node, nodes.Tasklet):
if node in global_code_nodes[state]:
continue
if state.entry_node(node) is None and not scope.is_devicelevel_gpu_kernel(
state.parent, state, node):
memlet_path_roots = set()
memlet_path_roots = memlet_path_roots.union(
[state.memlet_tree(e).root().edge.src for e in state.in_edges(node)]
)
memlet_path_roots = memlet_path_roots.union(
[state.memlet_tree(e).root().edge.dst for e in state.out_edges(node)]
)
gpu_accesses = [n.data for n in memlet_path_roots
if isinstance(n, nodes.AccessNode) and
sdfg.arrays[n.data].storage in gpu_storage]
if len(gpu_accesses) > 0:
global_code_nodes[state].append(node)

for state, gcodes in global_code_nodes.items():
for gcode in gcodes:
if gcode.label in self.exclude_tasklets.split(','):
Expand Down

0 comments on commit 3c7cd76

Please sign in to comment.