-
-
Notifications
You must be signed in to change notification settings - Fork 301
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
313 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
pub mod pattern { | ||
use bitflags::bitflags; | ||
|
||
bitflags! { | ||
pub struct Mode: u32 { | ||
/// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. | ||
const NO_SUB_DIR = 1 << 0; | ||
/// A pattern that is '*literal', meaning that it ends with what's given here | ||
const ENDS_WITH = 1 << 1; | ||
/// The pattern must match a directory, and not a file. | ||
const MUST_BE_DIR = 1 << 2; | ||
const NEGATIVE = 1 << 3; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,5 @@ | ||
#![forbid(unsafe_code, rust_2018_idioms)] | ||
|
||
pub mod ignore; | ||
|
||
pub mod parse; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
use crate::ignore; | ||
use bstr::{BString, ByteSlice}; | ||
|
||
pub struct Iter<'a> { | ||
lines: bstr::Lines<'a>, | ||
line_no: usize, | ||
} | ||
|
||
impl<'a> Iter<'a> { | ||
pub fn new(buf: &'a [u8]) -> Self { | ||
Iter { | ||
lines: buf.lines(), | ||
line_no: 0, | ||
} | ||
} | ||
} | ||
|
||
impl<'a> Iterator for Iter<'a> { | ||
type Item = (BString, ignore::pattern::Mode, usize); | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
let mut res = None; | ||
for mut line in self.lines.by_ref() { | ||
self.line_no += 1; | ||
let mut mode = ignore::pattern::Mode::empty(); | ||
if line.is_empty() { | ||
continue; | ||
}; | ||
if line.first() == Some(&b'#') { | ||
continue; | ||
} else if line.first() == Some(&b'!') { | ||
mode |= ignore::pattern::Mode::NEGATIVE; | ||
line = &line[1..]; | ||
} else if line.first() == Some(&b'\\') { | ||
let second = line.get(1); | ||
if second == Some(&b'!') || second == Some(&b'#') { | ||
line = &line[1..]; | ||
} | ||
} | ||
let mut line = truncate_non_escaped_trailing_spaces(line); | ||
if line.last() == Some(&b'/') { | ||
mode |= ignore::pattern::Mode::MUST_BE_DIR; | ||
line.pop(); | ||
} | ||
if !line.contains(&b'/') { | ||
mode |= ignore::pattern::Mode::NO_SUB_DIR; | ||
} | ||
if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { | ||
mode |= ignore::pattern::Mode::ENDS_WITH; | ||
} | ||
res = Some((line, mode, self.line_no)); | ||
break; | ||
} | ||
res | ||
} | ||
} | ||
|
||
/// We always copy just because that's ultimately needed anyway, not because we always have to. | ||
fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { | ||
match buf.rfind_not_byteset(br"\ ") { | ||
Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace | ||
None => buf.into(), | ||
Some(start_of_non_space) => { | ||
// This seems a bit strange but attempts to recreate the git implementation while | ||
// actually removing the escape characters before spaces. We leave other backslashes | ||
// for escapes to be handled by `glob/globset`. | ||
let mut res: BString = buf[..start_of_non_space + 1].into(); | ||
|
||
let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); | ||
let mut bare_spaces = 0; | ||
while let Some(b) = trailing_bytes.next() { | ||
match b { | ||
b' ' => { | ||
bare_spaces += 1; | ||
} | ||
b'\\' => { | ||
res.extend(std::iter::repeat(b' ').take(bare_spaces)); | ||
bare_spaces = 0; | ||
// Skip what follows, like git does, but keep spaces if possible. | ||
if trailing_bytes.next() == Some(&b' ') { | ||
res.push(b' '); | ||
} | ||
} | ||
_ => unreachable!("BUG: this must be either backslash or space"), | ||
} | ||
} | ||
res | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pub mod ignore; | ||
|
||
pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> { | ||
ignore::Iter::new(buf) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
mod parse { | ||
mod ignore { | ||
use git_attributes::ignore::pattern::Mode; | ||
use git_testtools::fixture_path; | ||
|
||
#[test] | ||
fn line_numbers_are_counted_correctly() { | ||
let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); | ||
let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); | ||
assert_eq!( | ||
actual, | ||
vec![ | ||
("*.[oa]".into(), Mode::NO_SUB_DIR, 2), | ||
("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), | ||
("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), | ||
("/*".into(), Mode::empty(), 11), | ||
("/foo".into(), Mode::NEGATIVE, 12), | ||
("/foo/*".into(), Mode::empty(), 13), | ||
("/foo/bar".into(), Mode::NEGATIVE, 14) | ||
] | ||
); | ||
} | ||
|
||
#[test] | ||
fn line_endings_can_be_windows_or_unix() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::<Vec<_>>(), | ||
vec![ | ||
(r"unix".into(), Mode::NO_SUB_DIR, 1), | ||
(r"windows".into(), Mode::NO_SUB_DIR, 2), | ||
(r"last".into(), Mode::NO_SUB_DIR, 3) | ||
] | ||
); | ||
} | ||
|
||
#[test] | ||
fn mark_ends_with_pattern_specifically() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"*literal").next(), | ||
Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"**literal").next(), | ||
Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), | ||
"double-asterisk won't allow for fast comparisons" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"*litera[l]").next(), | ||
Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"*litera?").next(), | ||
Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"*litera\?").next(), | ||
Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), | ||
"for now we don't handle escapes properly like git seems to do" | ||
); | ||
} | ||
|
||
#[test] | ||
fn comments_are_ignored() { | ||
assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); | ||
} | ||
|
||
#[test] | ||
fn backslashes_before_hashes_are_no_comments() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"\#hello").next(), | ||
Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"\hello\world").next(), | ||
Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn leading_exclamation_mark_negates_pattern() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(b"!hello").next(), | ||
Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn leading_exclamation_marks_can_be_escaped_with_backslash() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"\!hello").next(), | ||
Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn absence_of_sub_directories_are_marked() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a/b").next(), | ||
Some(("a/b".into(), Mode::empty(), 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"ab").next(), | ||
Some(("ab".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn trailing_slashes_are_marked_and_removed() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(b"dir/").next(), | ||
Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(b"dir///").next(), | ||
Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), | ||
"but only the last slash is removed" | ||
); | ||
} | ||
|
||
#[test] | ||
fn trailing_spaces_are_ignored() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a ").next(), | ||
Some(("a".into(), Mode::NO_SUB_DIR, 1)) | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(b"a\t\t ").next(), | ||
Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), | ||
"trailing tabs are not ignored" | ||
); | ||
} | ||
#[test] | ||
fn trailing_spaces_can_be_escaped_to_be_literal() { | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a \ ").next(), | ||
Some(("a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"a single escape in front of the last desired space is enough" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a b c ").next(), | ||
Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), | ||
"spaces in the middle are fine" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a\ \ \ ").next(), | ||
Some(("a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"one can also escape every single one" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a \ ").next(), | ||
Some(("a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"or just the one in the middle, losing the last actual space" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a \").next(), | ||
Some(("a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"escaping nothing also works as a whitespace protection" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a \\\ ").next(), | ||
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"strange things like these work too" | ||
); | ||
assert_eq!( | ||
git_attributes::parse::ignore(br"a \\ ").next(), | ||
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), | ||
"strange things like these work as well" | ||
); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# ignore objects and archives, anywhere in the tree. | ||
*.[oa] | ||
|
||
# ignore generated html files, | ||
*.html | ||
|
||
# except foo.html which is maintained by hand | ||
!foo.html | ||
|
||
# exclude everything except directory foo/bar | ||
/* | ||
!/foo | ||
/foo/* | ||
!/foo/bar |