Skip to content

Commit

Permalink
pythongh-89727: Fix os.walk RecursionError on deep trees
Browse files Browse the repository at this point in the history
Use a stack to implement os.walk iteratively instead of recursively to
avoid hitting recursion limits on deeply nested trees.
  • Loading branch information
jonburdo committed Dec 15, 2022
1 parent b7e4f1d commit 99f4691
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 74 deletions.
158 changes: 84 additions & 74 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,86 +343,96 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
return _walk(fspath(top), topdown, onerror, followlinks)

def _walk(top, topdown, onerror, followlinks):
dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
return
stack = [(False, top)]
while stack:
is_result, top = stack.pop()
if is_result:
yield top
continue

with scandir_it:
while True:
try:
dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
continue

cont = False
with scandir_it:
while True:
try:
entry = next(scandir_it)
except StopIteration:
try:
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break
except OSError as error:
if onerror is not None:
onerror(error)
return

try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False

if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
if is_dir:
dirs.append(entry.name)
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)

# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs

# Recurse into sub-directories
islink, join = path.islink, path.join
for dirname in dirs:
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from _walk(new_path, topdown, onerror, followlinks)
else:
# Recurse into sub-directories
for new_path in walk_dirs:
yield from _walk(new_path, topdown, onerror, followlinks)
# Yield after recursion if going bottom up
yield top, dirs, nondirs
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if cont:
continue

# Yield before sub-directory traversal if going top down
if topdown:
yield top, dirs, nondirs
# Traverse into sub-directories
islink, join = path.islink, path.join
for dirname in reversed(dirs):
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
stack.append((False, new_path))
else:
# Yield after sub-directory traversal if going bottom up
stack.append((True, (top, dirs, nondirs)))
# Traverse into sub-directories
for new_path in reversed(walk_dirs):
stack.append((False, new_path))

__all__.append("walk")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
deep directory structures by adjusting the implementation of
:func:`os._walk` to be iterative instead of recursive.

0 comments on commit 99f4691

Please sign in to comment.