Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: find Python files in Rust #591

Merged
merged 3 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ crate-type = ["cdylib"]
[dependencies]
chardetng = "0.1.17"
encoding_rs = "0.8.33"
ignore = "0.4.22"
log = "0.4.21"
path-slash = "0.2.1"
pyo3 = { version = "0.20.3", features = ["abi3-py38"] }
pyo3-log = "0.9.0"
rayon = "1.9.0"
Expand Down
2 changes: 1 addition & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ classifiers = [
]
dependencies = [
"click>=8.0.0,<9",
"pathspec>=0.9.0",
"colorama>=0.4.6; sys_platform == 'win32'",
"tomli>=2.0.1; python_version < '3.11'"
]
Expand Down
22 changes: 16 additions & 6 deletions python/deptry/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from deptry.exceptions import IncorrectDependencyFormatError, UnsupportedPythonVersionError
from deptry.imports.extract import get_imported_modules_from_list_of_files
from deptry.module import ModuleBuilder, ModuleLocations
from deptry.python_file_finder import PythonFileFinder
from deptry.python_file_finder import get_all_python_files_in
fpgmaas marked this conversation as resolved.
Show resolved Hide resolved
from deptry.reporters import JSONReporter, TextReporter
from deptry.stdlibs import STDLIBS_PYTHON
from deptry.violations import (
Expand Down Expand Up @@ -65,10 +65,7 @@ def run(self) -> None:

self._log_dependencies(dependencies_extract)

all_python_files = PythonFileFinder(
self.exclude, self.extend_exclude, self.using_default_exclude, self.ignore_notebooks
).get_all_python_files_in(self.root)

python_files = self._find_python_files()
local_modules = self._get_local_modules()
stdlib_modules = self._get_stdlib_modules()

Expand All @@ -83,7 +80,7 @@ def run(self) -> None:
).build(),
locations,
)
for module, locations in get_imported_modules_from_list_of_files(all_python_files).items()
for module, locations in get_imported_modules_from_list_of_files(python_files).items()
]
imported_modules_with_locations = [
module_with_locations
Expand All @@ -99,6 +96,19 @@ def run(self) -> None:

self._exit(violations)

def _find_python_files(self) -> list[Path]:
logging.debug("Collecting Python files to scan...")

python_files = get_all_python_files_in(
self.root, self.exclude, self.extend_exclude, self.using_default_exclude, self.ignore_notebooks
)

logging.debug(
"Python files to scan for imports:\n%s\n", "\n".join(str(python_file) for python_file in python_files)
)

return python_files

def _find_violations(
self, imported_modules_with_locations: list[ModuleLocations], dependencies: list[Dependency]
) -> list[Violation]:
Expand Down
85 changes: 12 additions & 73 deletions python/deptry/python_file_finder.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,18 @@
from __future__ import annotations

import logging
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Pattern

from pathspec import PathSpec
from deptry.rust import find_python_files


@dataclass
class PythonFileFinder:
"""
Get a list of all .py and .ipynb files recursively within a directory.
Args:
exclude: A list of regex patterns of paths to ignore.
extend_exclude: An additional list of regex patterns of paths to ignore.
using_default_exclude: Whether the exclude list was explicitly set, or the default was used.
ignore_notebooks: If ignore_notebooks is set to True, .ipynb files are ignored and only .py files are returned.
"""

exclude: tuple[str, ...]
extend_exclude: tuple[str, ...]
using_default_exclude: bool
ignore_notebooks: bool = False

def get_all_python_files_in(self, directories: tuple[Path, ...]) -> list[Path]:
logging.debug("Collecting Python files to scan...")

source_files = set()

ignore_regex = re.compile("|".join(self.exclude + self.extend_exclude))
file_lookup_suffixes = {".py"} if self.ignore_notebooks else {".py", ".ipynb"}

gitignore_spec = self._generate_gitignore_pathspec(Path())

for directory in directories:
for root_str, dirs, files in os.walk(directory, topdown=True):
root = Path(root_str)

if self._is_directory_ignored(root, ignore_regex):
dirs[:] = []
continue

for file_str in files:
file = root / file_str
if not self._is_file_ignored(file, file_lookup_suffixes, ignore_regex, gitignore_spec):
source_files.add(file)

source_files_list = list(source_files)

logging.debug("Python files to scan for imports:\n%s\n", "\n".join([str(file) for file in source_files_list]))

return source_files_list

def _is_directory_ignored(self, directory: Path, ignore_regex: Pattern[str]) -> bool:
return bool((self.exclude + self.extend_exclude) and ignore_regex.match(str(directory)))

def _is_file_ignored(
self, file: Path, file_lookup_suffixes: set[str], ignore_regex: Pattern[str], gitignore_spec: PathSpec | None
) -> bool:
return bool(
file.suffix not in file_lookup_suffixes
or ((self.exclude + self.extend_exclude) and ignore_regex.match(file.as_posix()))
or (gitignore_spec and gitignore_spec.match_file(file))
)

def _generate_gitignore_pathspec(self, directory: Path) -> PathSpec | None:
# If `exclude` is explicitly set, `.gitignore` is not taken into account.
if not self.using_default_exclude:
return None

try:
with (directory / ".gitignore").open() as gitignore:
return PathSpec.from_lines("gitwildmatch", gitignore)
except FileNotFoundError:
return None
def get_all_python_files_in(
directories: tuple[Path, ...],
exclude: tuple[str, ...],
extend_exclude: tuple[str, ...],
using_default_exclude: bool,
ignore_notebooks: bool = False,
) -> list[Path]:
return [
Path(f)
for f in find_python_files(directories, exclude, extend_exclude, using_default_exclude, ignore_notebooks)
]
9 changes: 9 additions & 0 deletions python/deptry/rust.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
from pathlib import Path

from .rust import Location as RustLocation

def get_imports_from_py_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def get_imports_from_ipynb_files(file_paths: list[str]) -> dict[str, list[RustLocation]]: ...
def find_python_files(
directories: tuple[Path, ...],
exclude: tuple[str, ...],
extend_exclude: tuple[str, ...],
using_default_exclude: bool,
ignore_notebooks: bool = False,
) -> list[str]: ...

class Location:
file: str
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use pyo3::prelude::*;
mod file_utils;
mod imports;
mod location;
mod python_file_finder;
mod visitor;

use location::Location;
Expand All @@ -18,6 +19,7 @@ fn rust(_py: Python, m: &PyModule) -> PyResult<()> {
imports::ipynb::get_imports_from_ipynb_files,
m
)?)?;
m.add_function(wrap_pyfunction!(python_file_finder::find_python_files, m)?)?;
m.add_class::<Location>()?;
Ok(())
}
Loading
Loading