Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement directory filter for extract #832

Merged
merged 2 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,22 @@ def _strip(line):
comments[:] = map(_strip, comments)


def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING,
options_map=None, keywords=DEFAULT_KEYWORDS,
comment_tags=(), callback=None, strip_comment_tags=False):
def default_directory_filter(dirpath):
subdir = os.path.basename(dirpath)
# Legacy default behavior: ignore dot and underscore directories
return not (subdir.startswith('.') or subdir.startswith('_'))


def extract_from_dir(
dirname=None,
method_map=DEFAULT_MAPPING,
options_map=None,
keywords=DEFAULT_KEYWORDS,
comment_tags=(),
callback=None,
strip_comment_tags=False,
directory_filter=None,
):
"""Extract messages from any source files found in the given directory.

This function generates tuples of the form ``(filename, lineno, message,
Expand Down Expand Up @@ -127,18 +140,23 @@ def extract_from_dir(dirname=None, method_map=DEFAULT_MAPPING,
positional arguments, in that order
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param directory_filter: a callback to determine whether a directory should
be recursed into. Receives the full directory path;
should return True if the directory is valid.
:see: `pathmatch`
"""
if dirname is None:
dirname = os.getcwd()
if options_map is None:
options_map = {}
if directory_filter is None:
directory_filter = default_directory_filter

absname = os.path.abspath(dirname)
for root, dirnames, filenames in os.walk(absname):
dirnames[:] = [
subdir for subdir in dirnames
if not (subdir.startswith('.') or subdir.startswith('_'))
if directory_filter(os.path.join(root, subdir))
]
dirnames.sort()
filenames.sort()
Expand Down
35 changes: 33 additions & 2 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""
from __future__ import print_function

import fnmatch
import logging
import optparse
import os
Expand Down Expand Up @@ -256,6 +257,20 @@ def _run_domain(self, domain):
return catalogs_and_errors


def _make_directory_filter(ignore_patterns):
"""
Build a directory_filter function based on a list of ignore patterns.
"""
def cli_directory_filter(dirname):
basename = os.path.basename(dirname)
return not any(
fnmatch.fnmatch(basename, ignore_pattern)
for ignore_pattern
in ignore_patterns
)
return cli_directory_filter


class extract_messages(Command):
"""Message extraction command for use in ``setup.py`` scripts.

Expand Down Expand Up @@ -320,13 +335,20 @@ class extract_messages(Command):
'files or directories with commas(,)'), # TODO: Support repetition of this argument
('input-dirs=', None, # TODO (3.x): Remove me.
'alias for input-paths (does allow files as well as directories).'),
('ignore-dirs=', None,
'Patterns for directories to ignore when scanning for messages. '
'Separate multiple patterns with spaces (default ".* ._")'),
]
boolean_options = [
'no-default-keywords', 'no-location', 'omit-header', 'no-wrap',
'sort-output', 'sort-by-file', 'strip-comments'
]
as_args = 'input-paths'
multiple_value_options = ('add-comments', 'keywords')
multiple_value_options = (
'add-comments',
'keywords',
'ignore-dirs',
)
option_aliases = {
'keywords': ('--keyword',),
'mapping-file': ('--mapping',),
Expand Down Expand Up @@ -359,6 +381,7 @@ def initialize_options(self):
self.add_comments = None
self.strip_comments = False
self.include_lineno = True
self.ignore_dirs = None

def finalize_options(self):
if self.input_dirs:
Expand Down Expand Up @@ -427,6 +450,13 @@ def finalize_options(self):
elif self.add_location == 'file':
self.include_lineno = False

ignore_dirs = listify_value(self.ignore_dirs)
if ignore_dirs:
self.directory_filter = _make_directory_filter(self.ignore_dirs)
else:
self.directory_filter = None


def run(self):
mappings = self._get_mappings()
with open(self.output_file, 'wb') as outfile:
Expand Down Expand Up @@ -469,7 +499,8 @@ def callback(filename, method, options):
keywords=self.keywords,
comment_tags=self.add_comments,
callback=callback,
strip_comment_tags=self.strip_comments
strip_comment_tags=self.strip_comments,
directory_filter=self.directory_filter,
)
for filename, lineno, message, comments, context in extracted:
if os.path.isfile(path):
Expand Down
5 changes: 5 additions & 0 deletions tests/messages/data/project/_hidden_by_default/hidden_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from gettext import gettext


def foo():
print(gettext('ssshhh....'))
23 changes: 23 additions & 0 deletions tests/messages/test_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1431,3 +1431,26 @@ def test_extract_error_code(monkeypatch, capsys):
if err:
# replace hack below for py2/py3 compatibility
assert "unknown named placeholder 'merkki'" in err.replace("u'", "'")


@pytest.mark.parametrize("with_underscore_ignore", (False, True))
def test_extract_ignore_dirs(monkeypatch, capsys, tmp_path, with_underscore_ignore):
pot_file = tmp_path / 'temp.pot'
monkeypatch.chdir(project_dir)
cmd = "extract . -o '{}' --ignore-dirs '*ignored*' ".format(pot_file)
if with_underscore_ignore:
# This also tests that multiple arguments are supported.
cmd += "--ignore-dirs '_*'"
cmdinst = configure_cli_command(cmd)
assert isinstance(cmdinst, extract_messages)
assert cmdinst.directory_filter
cmdinst.run()
pot_content = pot_file.read_text()

# The `ignored` directory is now actually ignored:
assert 'this_wont_normally_be_here' not in pot_content

# Since we manually set a filter, the otherwise `_hidden` directory is walked into,
# unless we opt in to ignore it again
assert ('ssshhh....' in pot_content) != with_underscore_ignore
assert ('_hidden_by_default' in pot_content) != with_underscore_ignore