Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Go source excludes; Cleanup old filespec matching #4350

Merged
merged 30 commits into from
Mar 27, 2017
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def __init__(self, address=None, payload=None, **kwargs):
# for example via plain old filesystem access.
globs = Globs(ParseContext(rel_path=address.spec_path, type_aliases={}))
sources = globs('*', exclude=[globs('BUILD*'),
# This skips dirents.
globs('*/')])
# skip subdir contents
globs('*/**')])
Copy link
Member

@stuhood stuhood Mar 23, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that this is a legal glob... should it be */**/*? Should check that it still compiles in latest master now that v2 is enabled.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test case added to prove the case.


payload = payload or Payload()
payload.add_fields({
Expand Down
7 changes: 1 addition & 6 deletions src/python/pants/build_graph/source_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _find_targets_for_source(self, source, spec_path):
self._build_graph.inject_address_closure(address)
target = self._build_graph.get_target(address)
sources = target.payload.get_field('sources')
if self._sources_match(source, sources):
if sources and sources.matches(source):
yield address
elif self._address_mapper.is_declaring_file(address, source):
yield address
Expand All @@ -71,11 +71,6 @@ def _find_targets_for_source(self, source, spec_path):
yield address
break

def _sources_match(self, source, sources):
if not sources:
return False
return sources.matches(source)


class LazySourceMapper(SourceMapper):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/source/filespec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def glob_to_regex(pattern):
:returns: A regex string that matches same paths as the input glob does.
"""
out = ['^']
components = pattern.strip('/').replace('.', '[.]').split('/')
components = pattern.strip('/').replace('.', '[.]').replace('$', '[$]').split('/')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it might make sense to consider a more general escape mechanism. This covers $, but there are other non-glob regex characters that can appear in paths.

Initially I was thinking we could just drop re.escape in here, but because we're actually trying to parse out the glob info, I don't think that would work.

Maybe on line: 37 https://github.com/pantsbuild/pants/pull/4350/files#diff-2d528bdfb8ee22a56a1b0660753b7a3cR37

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been operating under the assumption that this code doesn't need to live very long with the v2 engine on the way. But if it does, this should probably switch to pathspec, which we already have a dep on.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the time when this logic was introduced, pathspec was still using gitignore syntax which is different from glob matching syntax. I am not sure whether pathspec has evolved to support glob matching syntax since.

Copy link
Contributor Author

@wisechengyi wisechengyi Mar 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. Tried pathspec, and it doesn't fit our needs.

doublestar = False
for component in components:
if len(out) == 1:
Expand Down
20 changes: 16 additions & 4 deletions src/python/pants/source/wrapped_globs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
unicode_literals, with_statement)

import os
from abc import abstractproperty
from abc import abstractmethod, abstractproperty
from hashlib import sha1

from six import string_types
from twitter.common.dirutil.fileset import Fileset

from pants.base.build_environment import get_buildroot
from pants.source.filespec import matches_filespec
from pants.util.dirutil import fast_relpath
from pants.util.memo import memoized_property
from pants.util.meta import AbstractClass
Expand Down Expand Up @@ -64,18 +65,17 @@ def __iter__(self):
def __getitem__(self, index):
return self.files[index]

@abstractmethod
def iter_relative_paths(self):
"""An alternative `__iter__` that joins files with the relative root."""
for f in self:
yield os.path.join(self.rel_root, f)


class EagerFilesetWithSpec(FilesetWithSpec):
def __init__(self, rel_root, filespec, files, files_hash):
"""
:param rel_root: The root for the given filespec, relative to the buildroot.
:param filespec: A filespec as generated by `FilesetRelPathWrapper`, which represents
what globs or file list it came from.
what globs or file list it came from. Must be relative to buildroot.
:param files: A list of matched files, with declared order and duplicates preserved.
:param files_hash: A string fingerprint for all files in the fileset.
"""
Expand All @@ -91,6 +91,11 @@ def files(self):
def files_hash(self):
return self._files_hash

def iter_relative_paths(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to call this paths_from_buildroot_iter() (and below).

relative is a vague term.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed.

"""An alternative `__iter__` that joins files with the relative root."""
for f in self:
yield os.path.join(self.rel_root, f)

def __repr__(self):
return 'EagerFilesetWithSpec(rel_root={!r}, files={!r})'.format(self.rel_root, self.files)

Expand Down Expand Up @@ -120,6 +125,13 @@ def files_hash(self):
h.update(f.read())
return h.digest()

def iter_relative_paths(self):
"""An alternative `__iter__` that joins files with the relative root."""
for f in self:
path_from_buildroot = os.path.join(self.rel_root, f)
if matches_filespec(path_from_buildroot, self.filespec):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is redundant. The files in the collection are already guaranteed to match the filespec: the filespec is just metadata about how the fileset was constructed.

The point of the SpecAddressMapper is to not expand the fileset, and to just use the filespec instead. This was based on the assumption that all FilesetWithSpec implementations were lazy.

What I was suggesting was an abstract method on FilesetWithSpec something like:

def matches(self, paths):
    """Returns the from the given set that match this FilesetWithSpec."""

...and the implementation for Eager would be to just look in the fileset, while the implementation for Lazy would be to compile the regex from the filespec and use that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected. Pushed matches to FilesetWithSpec, and removed all the regex logic.

yield path_from_buildroot


class FilesetRelPathWrapper(AbstractClass):
KNOWN_PARAMETERS = frozenset(['exclude', 'follow_links'])
Expand Down
7 changes: 7 additions & 0 deletions tests/python/pants_test/source/test_filespec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def assert_rule_match(self, glob, expected_matches, negate=False):
asserter(re.match(regex, expected), 'glob_to_regex(`{}`) -> `{}` {} path `{}`'
.format(glob, regex, match_state, expected))

def test_glob_to_regex_with_dollar_sign(self):
self.assert_rule_match('a/b$abc*.py', ('a/b$abc.py', ))

def test_glob_to_regex_single_star_0(self):
self.assert_rule_match('a/b/*/f.py', ('a/b/c/f.py', 'a/b/q/f.py'))

Expand Down Expand Up @@ -74,6 +77,10 @@ def test_glob_to_regex_leading_slash_1_neg(self):
def test_glob_to_regex_leading_slash_2(self):
self.assert_rule_match('/**', ('/a', '/a/b/c/d/e/f'))

def test_glob_to_regex_match_subdir(self):
self.assert_rule_match('*/**', ('a/b.go', 'a/b/c.go', 'a/b/c/d.go'))
self.assert_rule_match('*/**', ('a.go'), negate=True)

def test_glob_to_regex_leading_slash_2_neg(self):
self.assert_rule_match('/**', ('a', 'a/b/c/d/e/f'), negate=True)

Expand Down
7 changes: 6 additions & 1 deletion tests/python/pants_test/source/test_payload_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ def test_passes_lazy_fileset_with_spec_through(self):
def test_passes_eager_fileset_with_spec_through(self):
self.create_file('foo/a.txt', 'a_contents')

fileset = EagerFilesetWithSpec('foo', {'globs': ['foo/a.txt']}, ['foo/a.txt'], {'foo/a.txt': b'12345'})
fileset = EagerFilesetWithSpec(rel_root='foo',
# Glob spec is relative to build root
filespec={'globs': ['foo/foo/a.txt']},
# files are relative to `rel_root`
files=['foo/a.txt'],
files_hash={'foo/a.txt': b'12345'})
sf = SourcesField(sources=fileset)

self.assertIs(fileset, sf.sources)
Expand Down