Skip to content

Commit

Permalink
pythonGH-89727: Fix os.fwalk() recursion error on deep trees (pytho…
Browse files Browse the repository at this point in the history
…n#119638)

Implement `os.fwalk()` using a list as a stack to avoid emitting recursion
errors on deeply nested trees.
  • Loading branch information
barneygale authored May 30, 2024
1 parent 2cc3502 commit 3c890b5
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 40 deletions.
91 changes: 53 additions & 38 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,24 +478,52 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
"""
sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
top = fspath(top)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd)
try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
path.samestat(orig_st, stat(topfd)))):
yield from _fwalk(topfd, top, isinstance(top, bytes),
topdown, onerror, follow_symlinks)
finally:
close(topfd)

def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
stack = [(_fwalk_walk, (True, dir_fd, top, top, None))]
isbytes = isinstance(top, bytes)
while stack:
yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks)

# Each item in the _fwalk() stack is a pair (action, args).
_fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry)
_fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd)
_fwalk_close = 2 # args: dirfd

def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks):
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.

action, value = stack.pop()
if action == _fwalk_close:
close(value)
return
elif action == _fwalk_yield:
yield value
return
assert action == _fwalk_walk
isroot, dirfd, toppath, topname, entry = value
try:
if not follow_symlinks:
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if entry is None:
orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd)
else:
orig_st = entry.stat(follow_symlinks=False)
topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd)
except OSError as err:
if isroot:
raise
if onerror is not None:
onerror(err)
return
stack.append((_fwalk_close, topfd))
if not follow_symlinks:
if isroot and not st.S_ISDIR(orig_st.st_mode):
return
if not path.samestat(orig_st, stat(topfd)):
return

scandir_it = scandir(topfd)
dirs = []
nondirs = []
Expand All @@ -521,31 +549,18 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):

if topdown:
yield toppath, dirs, nondirs, topfd
else:
stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd)))

for name in dirs if entries is None else zip(dirs, entries):
try:
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd)
except OSError as err:
if onerror is not None:
onerror(err)
continue
try:
if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
dirpath = path.join(toppath, name)
yield from _fwalk(dirfd, dirpath, isbytes,
topdown, onerror, follow_symlinks)
finally:
close(dirfd)

if not topdown:
yield toppath, dirs, nondirs, topfd
toppath = path.join(toppath, toppath[:0]) # Add trailing slash.
if entries is None:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, None))
for name in dirs[::-1])
else:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, entry))
for name, entry in zip(dirs[::-1], entries[::-1]))

__all__.append("fwalk")

Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -1687,8 +1687,6 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on
deep directory trees by adjusting the implementation to be iterative instead
of recursive.

0 comments on commit 3c890b5

Please sign in to comment.