From 605da3415cb929d4ebd944484d86dc214564a34f Mon Sep 17 00:00:00 2001 From: Sorin Sbarnea Date: Tue, 30 May 2023 12:47:22 +0100 Subject: [PATCH] Reimplement file exclusion logic --- .config/dictionary.txt | 1 + .config/requirements.in | 1 + .github/workflows/tox.yml | 2 +- src/ansiblelint/__main__.py | 2 +- src/ansiblelint/file_utils.py | 153 +++++++++++++++------------------- test/test_file_utils.py | 57 ++----------- test/test_runner.py | 4 +- test/test_utils.py | 2 - 8 files changed, 80 insertions(+), 142 deletions(-) diff --git a/.config/dictionary.txt b/.config/dictionary.txt index f73396f59a..e1e9f2b094 100644 --- a/.config/dictionary.txt +++ b/.config/dictionary.txt @@ -247,6 +247,7 @@ parseable pathex pathlib pathspec +pathspecs pbrun pfexec pickleable diff --git a/.config/requirements.in b/.config/requirements.in index f5e1e2d464..2ba3893174 100644 --- a/.config/requirements.in +++ b/.config/requirements.in @@ -7,6 +7,7 @@ black>=22.8.0 # MIT filelock>=3.3.0 # The Unlicense jsonschema>=4.10.0 # MIT, version needed for improved errors packaging>=21.3 # Apache-2.0,BSD-2-Clause +pathspec>=0.9.0 # Mozilla Public License 2.0 (MPL 2.0) pyyaml>=5.4.1 # MIT (centos 9 has 5.3.1) rich>=12.0.0 # MIT ruamel.yaml>=0.17.0,<0.18,!=0.17.29,!=0.17.30 # MIT, next version is planned to have breaking changes diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8dd097297e..4335392d49 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -59,7 +59,7 @@ jobs: WSLENV: FORCE_COLOR:PYTEST_REQPASS:TOXENV:GITHUB_STEP_SUMMARY # Number of expected test passes, safety measure for accidental skip of # tests. Update value if you add/remove tests. - PYTEST_REQPASS: 806 + PYTEST_REQPASS: 803 steps: - name: Activate WSL1 if: "contains(matrix.shell, 'wsl')" diff --git a/src/ansiblelint/__main__.py b/src/ansiblelint/__main__.py index 88e7f73e5e..7636882f4e 100755 --- a/src/ansiblelint/__main__.py +++ b/src/ansiblelint/__main__.py @@ -365,7 +365,7 @@ def path_inject() -> None: # We do know that finding ansible in PATH does not guarantee that it is # functioning or that is in fact the same version that was installed as # our dependency, but addressing this would be done by ansible-compat. - for cmd in ("ansible", "git"): + for cmd in ("ansible",): if not shutil.which(cmd): msg = f"Failed to find runtime dependency '{cmd}' in PATH" raise RuntimeError(msg) diff --git a/src/ansiblelint/file_utils.py b/src/ansiblelint/file_utils.py index ab66eeebbe..ce64ff5c74 100644 --- a/src/ansiblelint/file_utils.py +++ b/src/ansiblelint/file_utils.py @@ -4,21 +4,20 @@ import copy import logging import os -import subprocess import sys -from collections import OrderedDict, defaultdict +from collections import defaultdict from contextlib import contextmanager from pathlib import Path from tempfile import NamedTemporaryFile from typing import TYPE_CHECKING, Any, cast +import pathspec import wcmatch.pathlib import wcmatch.wcmatch from yaml.error import YAMLError from ansiblelint.config import BASE_KINDS, Options, options -from ansiblelint.constants import CONFIG_FILENAMES, GIT_CMD, FileType, States -from ansiblelint.logger import warn_or_fail +from ansiblelint.constants import CONFIG_FILENAMES, FileType, States if TYPE_CHECKING: from collections.abc import Iterator, Sequence @@ -419,93 +418,22 @@ def data(self) -> Any: # pylint: disable=redefined-outer-name -def discover_lintables(options: Options) -> dict[str, Any]: +def discover_lintables(options: Options) -> list[str]: """Find all files that we know how to lint. Return format is normalized, relative for stuff below cwd, ~/ for content under current user and absolute for everything else. """ - # git is preferred as it also considers .gitignore - # As --recurse-submodules is incompatible with --others we need to run - # twice to get combined results. - commands = { - "tracked": { - "cmd": [ - *GIT_CMD, - "ls-files", - "--cached", - "--exclude-standard", - "--recurse-submodules", - "-z", - ], - "remove": False, - }, - "others": { - "cmd": [ - *GIT_CMD, - "ls-files", - "--cached", - "--others", - "--exclude-standard", - "-z", - ], - "remove": False, - }, - "absent": { - "cmd": [ - *GIT_CMD, - "ls-files", - "--deleted", - "-z", - ], - "remove": True, - }, - } - - out: set[str] = set() - try: - for k, value in commands.items(): - if not isinstance(value["cmd"], list): - msg = f"Expected list but got {type(value['cmd'])}" - raise TypeError(msg) - result = subprocess.check_output( - value["cmd"], # noqa: S603 - stderr=subprocess.STDOUT, - text=True, - ).split("\x00")[:-1] - _logger.info( - "Discovered files to lint using git (%s): %s", - k, - " ".join(value["cmd"]), - ) - out = out.union(result) if not value["remove"] else out - set(result) - - except subprocess.CalledProcessError as exc: - if not (exc.returncode == 128 and "fatal: not a git repository" in exc.output): - err = exc.output.rstrip("\n") - warn_or_fail(f"Failed to discover lintable files using git: {err}") - except FileNotFoundError as exc: - if options.verbosity: - warn_or_fail(f"Failed to locate command: {exc}") - - # Applying exclude patterns - if not out: - out = set(".") - - exclude_pattern = "|".join(str(x) for x in options.exclude_paths) - _logger.info("Looking up for files, excluding %s ...", exclude_pattern) - # remove './' prefix from output of WcMatch - out = { - strip_dotslash_prefix(fname) - for fname in wcmatch.wcmatch.WcMatch( - ".", - exclude_pattern=exclude_pattern, - flags=wcmatch.wcmatch.RECURSIVE, - limit=256, - ).match() - } - - return OrderedDict.fromkeys(sorted(out)) + if not options.lintables: + options.lintables = ["."] + + return [ + str(filename) + for filename in get_all_files( + *[Path(s) for s in options.lintables], + exclude_paths=options.exclude_paths, + ) + ] def strip_dotslash_prefix(fname: str) -> str: @@ -598,3 +526,56 @@ def _guess_parent(lintable: Lintable) -> Lintable | None: except IndexError: pass return None + + +def get_all_files( + *paths: Path, + exclude_paths: list[str] | None = None, +) -> list[Path]: + """Recursively retrieve all files from given folders.""" + all_files: list[Path] = [] + exclude_paths = [] if exclude_paths is None else exclude_paths + + def is_excluded(path_to_check: Path) -> bool: + """Check if a file is exclude by current specs.""" + return any(spec.match_file(str(path_to_check)) for spec in pathspecs) + + for path in paths: + pathspecs = [ + pathspec.GitIgnoreSpec.from_lines( + [ + ".git", + ".tox", + ".mypy_cache", + "__pycache__", + ".DS_Store", + ".coverage", + ".pytest_cache", + ".ruff_cache", + *exclude_paths, + ], + ), + ] + gitignore = path / ".gitignore" + if gitignore.exists(): + with gitignore.open(encoding="UTF-8") as f: + _logger.info("Loading ignores from %s", gitignore) + pathspecs.append( + pathspec.GitIgnoreSpec.from_lines(f.read().splitlines()), + ) + + # Iterate over all items in the directory + if path.is_file(): + all_files.append(path) + else: + for item in sorted(path.iterdir()): + if is_excluded(item): + _logger.info("Excluded: %s", item) + continue + if item.is_file(): + all_files.append(item) + # If it's a directory, recursively call the function + elif item.is_dir(): + all_files.extend(get_all_files(item, exclude_paths=exclude_paths)) + + return all_files diff --git a/test/test_file_utils.py b/test/test_file_utils.py index d86d2a19cb..b7b9115155 100644 --- a/test/test_file_utils.py +++ b/test/test_file_utils.py @@ -1,6 +1,7 @@ """Tests for file utility functions.""" from __future__ import annotations +import copy import logging import os import time @@ -10,7 +11,6 @@ import pytest from ansiblelint import cli, file_utils -from ansiblelint.__main__ import initialize_logger from ansiblelint.file_utils import ( Lintable, cwd, @@ -27,7 +27,6 @@ from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch - from ansiblelint.config import Options from ansiblelint.constants import FileType from ansiblelint.rules import RulesCollection @@ -73,39 +72,7 @@ def test_expand_paths_vars( assert expand_paths_vars([test_path]) == [expected] # type: ignore[list-item] -@pytest.mark.parametrize( - ("reset_env_var", "message_prefix"), - ( - # simulate absence of git command - ("PATH", "Failed to locate command: "), - # simulate a missing git repo - ("GIT_DIR", "Looking up for files"), - ), - ids=("no-git-cli", "outside-git-repo"), -) -def test_discover_lintables_git_verbose( - reset_env_var: str, - message_prefix: str, - monkeypatch: MonkeyPatch, - caplog: LogCaptureFixture, -) -> None: - """Ensure that autodiscovery lookup failures are logged.""" - options = cli.get_config(["-v"]) - initialize_logger(options.verbosity) - monkeypatch.setenv(reset_env_var, "") - file_utils.discover_lintables(options) - - assert any(m[2].startswith("Looking up for files") for m in caplog.record_tuples) - assert any(m.startswith(message_prefix) for m in caplog.messages) - - -@pytest.mark.parametrize( - "is_in_git", - (True, False), - ids=("in Git", "outside Git"), -) def test_discover_lintables_silent( - is_in_git: bool, monkeypatch: MonkeyPatch, capsys: CaptureFixture[str], caplog: LogCaptureFixture, @@ -119,16 +86,16 @@ def test_discover_lintables_silent( caplog.set_level(logging.FATAL) options = cli.get_config([]) test_dir = Path(__file__).resolve().parent - lint_path = test_dir / ".." / "examples" / "roles" / "test-role" - if not is_in_git: - monkeypatch.setenv("GIT_DIR", "") + lint_path = (test_dir / ".." / "examples" / "roles" / "test-role").resolve() yaml_count = len(list(lint_path.glob("**/*.yml"))) + len( list(lint_path.glob("**/*.yaml")), ) monkeypatch.chdir(str(lint_path)) - files = file_utils.discover_lintables(options) + my_options = copy.deepcopy(options) + my_options.lintables = [str(lint_path)] + files = file_utils.discover_lintables(my_options) stderr = capsys.readouterr().err assert ( not stderr @@ -144,7 +111,7 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None: """Verify that filenames containing German umlauts are not garbled by the discover_lintables.""" options = cli.get_config([]) test_dir = Path(__file__).resolve().parent - lint_path = test_dir / ".." / "examples" / "playbooks" + lint_path = (test_dir / ".." / "examples" / "playbooks").resolve() monkeypatch.chdir(str(lint_path)) files = file_utils.discover_lintables(options) @@ -293,23 +260,13 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None: ), # content should determine it as a play ), ) -def test_kinds(monkeypatch: MonkeyPatch, path: str, kind: FileType) -> None: +def test_kinds(path: str, kind: FileType) -> None: """Verify auto-detection logic based on DEFAULT_KINDS.""" - options = cli.get_config([]) - - # pylint: disable=unused-argument - def mockreturn(options: Options) -> dict[str, Any]: # noqa: ARG001 - return {normpath(path): kind} - # assert Lintable is able to determine file type lintable_detected = Lintable(path) lintable_expected = Lintable(path, kind=kind) assert lintable_detected == lintable_expected - monkeypatch.setattr(file_utils, "discover_lintables", mockreturn) - result = file_utils.discover_lintables(options) - assert lintable_detected.kind == result[lintable_expected.name] - def test_find_project_root_1(tmp_path: Path) -> None: """Verify find_project_root().""" diff --git a/test/test_runner.py b/test/test_runner.py index c2dfbf0e9d..e89cee15be 100644 --- a/test/test_runner.py +++ b/test/test_runner.py @@ -99,8 +99,8 @@ def test_runner_exclude_globs( ) matches = runner.run() - # we expect to find one 2 matches from the very few .yaml file we have there (most of them have .yml extension) - assert len(matches) == 2 + # we expect to find one match from the very few .yaml file we have there (most of them have .yml extension) + assert len(matches) == 1 @pytest.mark.parametrize( diff --git a/test/test_utils.py b/test/test_utils.py index b2c11eaab9..6ff1449a90 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -300,8 +300,6 @@ def test_cli_auto_detect(capfd: CaptureFixture[str]) -> None: out, err = capfd.readouterr() - # Confirmation that it runs in auto-detect mode - assert "Discovered files to lint using git" in err # An expected rule match from our examples assert ( "examples/playbooks/empty_playbook.yml:1:1: "