Skip to content

Commit 23dfc5b

Browse files
authored
Fix ignoring input files for symlink reasons (#4222)
This relates to #4015, #4161 and the behaviour of os.getcwd() Black is a big user of pathlib and as such loves doing `.resolve()`, since for a long time it was the only good way of getting an absolute path in pathlib. However, this has two problems: The first minor problem is performance, e.g. in #3751 I (safely) got rid of a bunch of `.resolve()` which made Black 40% faster on cached runs. The second more important problem is that always resolving symlinks results in unintuitive exclusion behaviour. For instance, a gitignored symlink should never alter formatting of your actual code. This kind of thing was reported by users a few times. In #3846, I improved the exclusion rule logic for symlinks in `gen_python_files` and everything was good. But `gen_python_files` isn't enough, there's also `get_sources`, which handles user specified paths directly (instead of files Black discovers). So in #4015, I made a very similar change to #3846 for `get_sources`, and this is where some problems began. The core issue was the line: ``` root_relative_path = path.absolute().relative_to(root).as_posix() ``` The first issue is that despite root being computed from user inputs, we call `.resolve()` while computing it (likely unecessarily). Which means that `path` may not actually be relative to `root`. So I started off this PR trying to fix that, when I ran into the second issue. Which is that `os.getcwd()` (as called by `os.path.abspath` or `Path.absolute` or `Path.cwd`) also often resolves symlinks! ``` >>> import os >>> os.environ.get("PWD") '/Users/shantanu/dev/black/symlink/bug' >>> os.getcwd() '/Users/shantanu/dev/black/actual/bug' ``` This also meant that the breakage often would not show up when input relative paths. This doesn't affect `gen_python_files` / #3846 because things are always absolute and known to be relative to `root`. Anyway, it looks like #4161 fixed the crash by just swallowing the error and ignoring the file. Instead, we should just try to compute the actual relative path. I think this PR should be quite safe, but we could also consider reverting some of the previous changes; the associated issues weren't too popular. At the same time, I think there's still behaviour that can be improved and I kind of want to make larger changes, but maybe I'll save that for if we do something like #3952 Hopefully fixes #4205, fixes #4209, actual fix for #4077
1 parent a201003 commit 23dfc5b

File tree

4 files changed

+127
-53
lines changed

4 files changed

+127
-53
lines changed

Diff for: CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
### Configuration
2626

27+
- Fix issue where _Black_ would ignore input files in the presence of symlinks (#4222)
2728
- _Black_ now ignores `pyproject.toml` that is missing a `tool.black` section when
2829
discovering project root and configuration. Since _Black_ continues to use version
2930
control as an indicator of project root, this is expected to primarily change behavior

Diff for: src/black/__init__.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@
4444
STDIN_PLACEHOLDER,
4545
)
4646
from black.files import (
47+
best_effort_relative_path,
4748
find_project_root,
4849
find_pyproject_toml,
4950
find_user_pyproject_toml,
5051
gen_python_files,
5152
get_gitignore,
52-
get_root_relative_path,
5353
parse_pyproject_toml,
5454
path_is_excluded,
5555
resolves_outside_root_or_cannot_stat,
@@ -734,6 +734,7 @@ def get_sources(
734734
"""Compute the set of files to be formatted."""
735735
sources: Set[Path] = set()
736736

737+
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
737738
using_default_exclude = exclude is None
738739
exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude
739740
gitignore: Optional[Dict[Path, PathSpec]] = None
@@ -749,11 +750,12 @@ def get_sources(
749750

750751
# Compare the logic here to the logic in `gen_python_files`.
751752
if is_stdin or path.is_file():
752-
root_relative_path = get_root_relative_path(path, root, report)
753-
754-
if root_relative_path is None:
753+
if resolves_outside_root_or_cannot_stat(path, root, report):
754+
if verbose:
755+
out(f'Skipping invalid source: "{path}"', fg="red")
755756
continue
756757

758+
root_relative_path = best_effort_relative_path(path, root).as_posix()
757759
root_relative_path = "/" + root_relative_path
758760

759761
# Hard-exclude any files that matches the `--force-exclude` regex.
@@ -763,11 +765,6 @@ def get_sources(
763765
)
764766
continue
765767

766-
if resolves_outside_root_or_cannot_stat(path, root, report):
767-
if verbose:
768-
out(f'Skipping invalid source: "{path}"', fg="red")
769-
continue
770-
771768
if is_stdin:
772769
path = Path(f"{STDIN_PLACEHOLDER}{str(path)}")
773770

Diff for: src/black/files.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ def _load_toml(path: Union[Path, str]) -> Dict[str, Any]:
4848
return tomllib.load(f)
4949

5050

51+
@lru_cache
52+
def _cached_resolve(path: Path) -> Path:
53+
return path.resolve()
54+
55+
5156
@lru_cache
5257
def find_project_root(
5358
srcs: Sequence[str], stdin_filename: Optional[str] = None
@@ -67,9 +72,9 @@ def find_project_root(
6772
if stdin_filename is not None:
6873
srcs = tuple(stdin_filename if s == "-" else s for s in srcs)
6974
if not srcs:
70-
srcs = [str(Path.cwd().resolve())]
75+
srcs = [str(_cached_resolve(Path.cwd()))]
7176

72-
path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
77+
path_srcs = [_cached_resolve(Path(Path.cwd(), src)) for src in srcs]
7378

7479
# A list of lists of parents for each 'src'. 'src' is included as a
7580
# "parent" of itself if it is a directory
@@ -236,7 +241,7 @@ def find_user_pyproject_toml() -> Path:
236241
else:
237242
config_root = os.environ.get("XDG_CONFIG_HOME", "~/.config")
238243
user_config_path = Path(config_root).expanduser() / "black"
239-
return user_config_path.resolve()
244+
return _cached_resolve(user_config_path)
240245

241246

242247
@lru_cache
@@ -266,27 +271,31 @@ def resolves_outside_root_or_cannot_stat(
266271
try:
267272
if sys.version_info < (3, 8, 6):
268273
path = path.absolute() # https://bugs.python.org/issue33660
269-
resolved_path = path.resolve()
270-
return get_root_relative_path(resolved_path, root, report) is None
274+
resolved_path = _cached_resolve(path)
271275
except OSError as e:
272276
if report:
273277
report.path_ignored(path, f"cannot be read because {e}")
274278
return True
275-
276-
277-
def get_root_relative_path(
278-
path: Path,
279-
root: Path,
280-
report: Optional[Report] = None,
281-
) -> Optional[str]:
282-
"""Returns the file path relative to the 'root' directory"""
283279
try:
284-
root_relative_path = path.absolute().relative_to(root).as_posix()
280+
resolved_path.relative_to(root)
285281
except ValueError:
286282
if report:
287283
report.path_ignored(path, f"is a symbolic link that points outside {root}")
288-
return None
289-
return root_relative_path
284+
return True
285+
return False
286+
287+
288+
def best_effort_relative_path(path: Path, root: Path) -> Path:
289+
# Precondition: resolves_outside_root_or_cannot_stat(path, root) is False
290+
try:
291+
return path.absolute().relative_to(root)
292+
except ValueError:
293+
pass
294+
root_parent = next((p for p in path.parents if _cached_resolve(p) == root), None)
295+
if root_parent is not None:
296+
return path.relative_to(root_parent)
297+
# something adversarial, fallback to path guaranteed by precondition
298+
return _cached_resolve(path).relative_to(root)
290299

291300

292301
def _path_is_ignored(
@@ -339,7 +348,8 @@ def gen_python_files(
339348

340349
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
341350
for child in paths:
342-
root_relative_path = child.absolute().relative_to(root).as_posix()
351+
assert child.is_absolute()
352+
root_relative_path = child.relative_to(root).as_posix()
343353

344354
# First ignore files matching .gitignore, if passed
345355
if gitignore_dict and _path_is_ignored(

Diff for: tests/test_black.py

+93-27
Original file line numberDiff line numberDiff line change
@@ -2567,32 +2567,32 @@ def test_symlinks(self) -> None:
25672567
gitignore = PathSpec.from_lines("gitwildmatch", [])
25682568

25692569
regular = MagicMock()
2570-
regular.absolute.return_value = root / "regular.py"
2570+
regular.relative_to.return_value = Path("regular.py")
25712571
regular.resolve.return_value = root / "regular.py"
25722572
regular.is_dir.return_value = False
25732573
regular.is_file.return_value = True
25742574

25752575
outside_root_symlink = MagicMock()
2576-
outside_root_symlink.absolute.return_value = root / "symlink.py"
2576+
outside_root_symlink.relative_to.return_value = Path("symlink.py")
25772577
outside_root_symlink.resolve.return_value = Path("/nowhere")
25782578
outside_root_symlink.is_dir.return_value = False
25792579
outside_root_symlink.is_file.return_value = True
25802580

25812581
ignored_symlink = MagicMock()
2582-
ignored_symlink.absolute.return_value = root / ".mypy_cache" / "symlink.py"
2582+
ignored_symlink.relative_to.return_value = Path(".mypy_cache") / "symlink.py"
25832583
ignored_symlink.is_dir.return_value = False
25842584
ignored_symlink.is_file.return_value = True
25852585

25862586
# A symlink that has an excluded name, but points to an included name
25872587
symlink_excluded_name = MagicMock()
2588-
symlink_excluded_name.absolute.return_value = root / "excluded_name"
2588+
symlink_excluded_name.relative_to.return_value = Path("excluded_name")
25892589
symlink_excluded_name.resolve.return_value = root / "included_name.py"
25902590
symlink_excluded_name.is_dir.return_value = False
25912591
symlink_excluded_name.is_file.return_value = True
25922592

25932593
# A symlink that has an included name, but points to an excluded name
25942594
symlink_included_name = MagicMock()
2595-
symlink_included_name.absolute.return_value = root / "included_name.py"
2595+
symlink_included_name.relative_to.return_value = Path("included_name.py")
25962596
symlink_included_name.resolve.return_value = root / "excluded_name"
25972597
symlink_included_name.is_dir.return_value = False
25982598
symlink_included_name.is_file.return_value = True
@@ -2626,39 +2626,100 @@ def test_symlinks(self) -> None:
26262626
outside_root_symlink.resolve.assert_called_once()
26272627
ignored_symlink.resolve.assert_not_called()
26282628

2629+
def test_get_sources_symlink_and_force_exclude(self) -> None:
2630+
with TemporaryDirectory() as tempdir:
2631+
tmp = Path(tempdir).resolve()
2632+
actual = tmp / "actual"
2633+
actual.mkdir()
2634+
symlink = tmp / "symlink"
2635+
symlink.symlink_to(actual)
2636+
2637+
actual_proj = actual / "project"
2638+
actual_proj.mkdir()
2639+
(actual_proj / "module.py").write_text("print('hello')", encoding="utf-8")
2640+
2641+
symlink_proj = symlink / "project"
2642+
2643+
with change_directory(symlink_proj):
2644+
assert_collected_sources(
2645+
src=["module.py"],
2646+
root=symlink_proj.resolve(),
2647+
expected=["module.py"],
2648+
)
2649+
2650+
absolute_module = symlink_proj / "module.py"
2651+
assert_collected_sources(
2652+
src=[absolute_module],
2653+
root=symlink_proj.resolve(),
2654+
expected=[absolute_module],
2655+
)
2656+
2657+
# a few tricky tests for force_exclude
2658+
flat_symlink = symlink_proj / "symlink_module.py"
2659+
flat_symlink.symlink_to(actual_proj / "module.py")
2660+
assert_collected_sources(
2661+
src=[flat_symlink],
2662+
root=symlink_proj.resolve(),
2663+
force_exclude=r"/symlink_module.py",
2664+
expected=[],
2665+
)
2666+
2667+
target = actual_proj / "target"
2668+
target.mkdir()
2669+
(target / "another.py").write_text("print('hello')", encoding="utf-8")
2670+
(symlink_proj / "nested").symlink_to(target)
2671+
2672+
assert_collected_sources(
2673+
src=[symlink_proj / "nested" / "another.py"],
2674+
root=symlink_proj.resolve(),
2675+
force_exclude=r"nested",
2676+
expected=[],
2677+
)
2678+
assert_collected_sources(
2679+
src=[symlink_proj / "nested" / "another.py"],
2680+
root=symlink_proj.resolve(),
2681+
force_exclude=r"target",
2682+
expected=[symlink_proj / "nested" / "another.py"],
2683+
)
2684+
26292685
def test_get_sources_with_stdin_symlink_outside_root(
26302686
self,
26312687
) -> None:
26322688
path = THIS_DIR / "data" / "include_exclude_tests"
26332689
stdin_filename = str(path / "b/exclude/a.py")
26342690
outside_root_symlink = Path("/target_directory/a.py")
2691+
root = Path("target_dir/").resolve().absolute()
26352692
with patch("pathlib.Path.resolve", return_value=outside_root_symlink):
26362693
assert_collected_sources(
2637-
root=Path("target_directory/"),
2694+
root=root,
26382695
src=["-"],
26392696
expected=[],
26402697
stdin_filename=stdin_filename,
26412698
)
26422699

2643-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
26442700
def test_get_sources_with_stdin(self) -> None:
26452701
src = ["-"]
26462702
expected = ["-"]
2647-
assert_collected_sources(src, expected, include="", exclude=r"/exclude/|a\.py")
2703+
assert_collected_sources(
2704+
src,
2705+
root=THIS_DIR.resolve(),
2706+
expected=expected,
2707+
include="",
2708+
exclude=r"/exclude/|a\.py",
2709+
)
26482710

2649-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
26502711
def test_get_sources_with_stdin_filename(self) -> None:
26512712
src = ["-"]
26522713
stdin_filename = str(THIS_DIR / "data/collections.py")
26532714
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
26542715
assert_collected_sources(
26552716
src,
2656-
expected,
2717+
root=THIS_DIR.resolve(),
2718+
expected=expected,
26572719
exclude=r"/exclude/a\.py",
26582720
stdin_filename=stdin_filename,
26592721
)
26602722

2661-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
26622723
def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
26632724
# Exclude shouldn't exclude stdin_filename since it is mimicking the
26642725
# file being passed directly. This is the same as
@@ -2669,12 +2730,12 @@ def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
26692730
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
26702731
assert_collected_sources(
26712732
src,
2672-
expected,
2733+
root=THIS_DIR.resolve(),
2734+
expected=expected,
26732735
exclude=r"/exclude/|a\.py",
26742736
stdin_filename=stdin_filename,
26752737
)
26762738

2677-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
26782739
def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
26792740
# Extend exclude shouldn't exclude stdin_filename since it is mimicking the
26802741
# file being passed directly. This is the same as
@@ -2685,40 +2746,45 @@ def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
26852746
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
26862747
assert_collected_sources(
26872748
src,
2688-
expected,
2749+
root=THIS_DIR.resolve(),
2750+
expected=expected,
26892751
extend_exclude=r"/exclude/|a\.py",
26902752
stdin_filename=stdin_filename,
26912753
)
26922754

2693-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
26942755
def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None:
26952756
# Force exclude should exclude the file when passing it through
26962757
# stdin_filename
26972758
path = THIS_DIR / "data" / "include_exclude_tests"
26982759
stdin_filename = str(path / "b/exclude/a.py")
26992760
assert_collected_sources(
27002761
src=["-"],
2762+
root=THIS_DIR.resolve(),
27012763
expected=[],
27022764
force_exclude=r"/exclude/|a\.py",
27032765
stdin_filename=stdin_filename,
27042766
)
27052767

2706-
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
27072768
def test_get_sources_with_stdin_filename_and_force_exclude_and_symlink(
27082769
self,
27092770
) -> None:
27102771
# Force exclude should exclude a symlink based on the symlink, not its target
2711-
path = THIS_DIR / "data" / "include_exclude_tests"
2712-
stdin_filename = str(path / "symlink.py")
2713-
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
2714-
target = path / "b/exclude/a.py"
2715-
with patch("pathlib.Path.resolve", return_value=target):
2716-
assert_collected_sources(
2717-
src=["-"],
2718-
expected=expected,
2719-
force_exclude=r"exclude/a\.py",
2720-
stdin_filename=stdin_filename,
2721-
)
2772+
with TemporaryDirectory() as tempdir:
2773+
tmp = Path(tempdir).resolve()
2774+
(tmp / "exclude").mkdir()
2775+
(tmp / "exclude" / "a.py").write_text("print('hello')", encoding="utf-8")
2776+
(tmp / "symlink.py").symlink_to(tmp / "exclude" / "a.py")
2777+
2778+
stdin_filename = str(tmp / "symlink.py")
2779+
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
2780+
with change_directory(tmp):
2781+
assert_collected_sources(
2782+
src=["-"],
2783+
root=tmp,
2784+
expected=expected,
2785+
force_exclude=r"exclude/a\.py",
2786+
stdin_filename=stdin_filename,
2787+
)
27222788

27232789

27242790
class TestDeFactoAPI:

0 commit comments

Comments
 (0)