Skip to content

Commit

Permalink
Merge branch 'parse-git-ignore'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Mar 18, 2022
2 parents f9c2190 + 9a9115f commit 8ab19a6
Show file tree
Hide file tree
Showing 8 changed files with 313 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions git-attributes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ doctest = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bstr = { version = "0.2.13", default-features = false, features = ["std"]}
bitflags = "1.3.2"

[dev-dependencies]
git-testtools = { path = "../tests/tools"}
15 changes: 15 additions & 0 deletions git-attributes/src/ignore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
pub mod pattern {
use bitflags::bitflags;

bitflags! {
pub struct Mode: u32 {
/// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one.
const NO_SUB_DIR = 1 << 0;
/// A pattern that is '*literal', meaning that it ends with what's given here
const ENDS_WITH = 1 << 1;
/// The pattern must match a directory, and not a file.
const MUST_BE_DIR = 1 << 2;
const NEGATIVE = 1 << 3;
}
}
}
4 changes: 4 additions & 0 deletions git-attributes/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
#![forbid(unsafe_code, rust_2018_idioms)]

pub mod ignore;

pub mod parse;
90 changes: 90 additions & 0 deletions git-attributes/src/parse/ignore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use crate::ignore;
use bstr::{BString, ByteSlice};

pub struct Iter<'a> {
lines: bstr::Lines<'a>,
line_no: usize,
}

impl<'a> Iter<'a> {
pub fn new(buf: &'a [u8]) -> Self {
Iter {
lines: buf.lines(),
line_no: 0,
}
}
}

impl<'a> Iterator for Iter<'a> {
type Item = (BString, ignore::pattern::Mode, usize);

fn next(&mut self) -> Option<Self::Item> {
let mut res = None;
for mut line in self.lines.by_ref() {
self.line_no += 1;
let mut mode = ignore::pattern::Mode::empty();
if line.is_empty() {
continue;
};
if line.first() == Some(&b'#') {
continue;
} else if line.first() == Some(&b'!') {
mode |= ignore::pattern::Mode::NEGATIVE;
line = &line[1..];
} else if line.first() == Some(&b'\\') {
let second = line.get(1);
if second == Some(&b'!') || second == Some(&b'#') {
line = &line[1..];
}
}
let mut line = truncate_non_escaped_trailing_spaces(line);
if line.last() == Some(&b'/') {
mode |= ignore::pattern::Mode::MUST_BE_DIR;
line.pop();
}
if !line.contains(&b'/') {
mode |= ignore::pattern::Mode::NO_SUB_DIR;
}
if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() {
mode |= ignore::pattern::Mode::ENDS_WITH;
}
res = Some((line, mode, self.line_no));
break;
}
res
}
}

/// We always copy just because that's ultimately needed anyway, not because we always have to.
fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString {
match buf.rfind_not_byteset(br"\ ") {
Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace
None => buf.into(),
Some(start_of_non_space) => {
// This seems a bit strange but attempts to recreate the git implementation while
// actually removing the escape characters before spaces. We leave other backslashes
// for escapes to be handled by `glob/globset`.
let mut res: BString = buf[..start_of_non_space + 1].into();

let mut trailing_bytes = buf[start_of_non_space + 1..].iter();
let mut bare_spaces = 0;
while let Some(b) = trailing_bytes.next() {
match b {
b' ' => {
bare_spaces += 1;
}
b'\\' => {
res.extend(std::iter::repeat(b' ').take(bare_spaces));
bare_spaces = 0;
// Skip what follows, like git does, but keep spaces if possible.
if trailing_bytes.next() == Some(&b' ') {
res.push(b' ');
}
}
_ => unreachable!("BUG: this must be either backslash or space"),
}
}
res
}
}
}
5 changes: 5 additions & 0 deletions git-attributes/src/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod ignore;

pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> {
ignore::Iter::new(buf)
}
175 changes: 175 additions & 0 deletions git-attributes/tests/attributes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
mod parse {
mod ignore {
use git_attributes::ignore::pattern::Mode;
use git_testtools::fixture_path;

#[test]
fn line_numbers_are_counted_correctly() {
let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap();
let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect();
assert_eq!(
actual,
vec![
("*.[oa]".into(), Mode::NO_SUB_DIR, 2),
("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5),
("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8),
("/*".into(), Mode::empty(), 11),
("/foo".into(), Mode::NEGATIVE, 12),
("/foo/*".into(), Mode::empty(), 13),
("/foo/bar".into(), Mode::NEGATIVE, 14)
]
);
}

#[test]
fn line_endings_can_be_windows_or_unix() {
assert_eq!(
git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::<Vec<_>>(),
vec![
(r"unix".into(), Mode::NO_SUB_DIR, 1),
(r"windows".into(), Mode::NO_SUB_DIR, 2),
(r"last".into(), Mode::NO_SUB_DIR, 3)
]
);
}

#[test]
fn mark_ends_with_pattern_specifically() {
assert_eq!(
git_attributes::parse::ignore(br"*literal").next(),
Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1))
);
assert_eq!(
git_attributes::parse::ignore(br"**literal").next(),
Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)),
"double-asterisk won't allow for fast comparisons"
);
assert_eq!(
git_attributes::parse::ignore(br"*litera[l]").next(),
Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1))
);
assert_eq!(
git_attributes::parse::ignore(br"*litera?").next(),
Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1))
);
assert_eq!(
git_attributes::parse::ignore(br"*litera\?").next(),
Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)),
"for now we don't handle escapes properly like git seems to do"
);
}

#[test]
fn comments_are_ignored() {
assert!(git_attributes::parse::ignore(b"# hello world").next().is_none());
}

#[test]
fn backslashes_before_hashes_are_no_comments() {
assert_eq!(
git_attributes::parse::ignore(br"\#hello").next(),
Some((r"#hello".into(), Mode::NO_SUB_DIR, 1))
);
}

#[test]
fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() {
assert_eq!(
git_attributes::parse::ignore(br"\hello\world").next(),
Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1))
);
}

#[test]
fn leading_exclamation_mark_negates_pattern() {
assert_eq!(
git_attributes::parse::ignore(b"!hello").next(),
Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1))
);
}

#[test]
fn leading_exclamation_marks_can_be_escaped_with_backslash() {
assert_eq!(
git_attributes::parse::ignore(br"\!hello").next(),
Some(("!hello".into(), Mode::NO_SUB_DIR, 1))
);
}

#[test]
fn absence_of_sub_directories_are_marked() {
assert_eq!(
git_attributes::parse::ignore(br"a/b").next(),
Some(("a/b".into(), Mode::empty(), 1))
);
assert_eq!(
git_attributes::parse::ignore(br"ab").next(),
Some(("ab".into(), Mode::NO_SUB_DIR, 1))
);
}

#[test]
fn trailing_slashes_are_marked_and_removed() {
assert_eq!(
git_attributes::parse::ignore(b"dir/").next(),
Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1))
);
assert_eq!(
git_attributes::parse::ignore(b"dir///").next(),
Some(("dir//".into(), Mode::MUST_BE_DIR, 1)),
"but only the last slash is removed"
);
}

#[test]
fn trailing_spaces_are_ignored() {
assert_eq!(
git_attributes::parse::ignore(br"a ").next(),
Some(("a".into(), Mode::NO_SUB_DIR, 1))
);
assert_eq!(
git_attributes::parse::ignore(b"a\t\t ").next(),
Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)),
"trailing tabs are not ignored"
);
}
#[test]
fn trailing_spaces_can_be_escaped_to_be_literal() {
assert_eq!(
git_attributes::parse::ignore(br"a \ ").next(),
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
"a single escape in front of the last desired space is enough"
);
assert_eq!(
git_attributes::parse::ignore(br"a b c ").next(),
Some(("a b c".into(), Mode::NO_SUB_DIR, 1)),
"spaces in the middle are fine"
);
assert_eq!(
git_attributes::parse::ignore(br"a\ \ \ ").next(),
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
"one can also escape every single one"
);
assert_eq!(
git_attributes::parse::ignore(br"a \ ").next(),
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
"or just the one in the middle, losing the last actual space"
);
assert_eq!(
git_attributes::parse::ignore(br"a \").next(),
Some(("a ".into(), Mode::NO_SUB_DIR, 1)),
"escaping nothing also works as a whitespace protection"
);
assert_eq!(
git_attributes::parse::ignore(br"a \\\ ").next(),
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)),
"strange things like these work too"
);
assert_eq!(
git_attributes::parse::ignore(br"a \\ ").next(),
Some((r"a ".into(), Mode::NO_SUB_DIR, 1)),
"strange things like these work as well"
);
}
}
}
14 changes: 14 additions & 0 deletions git-attributes/tests/fixtures/ignore/various.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# ignore objects and archives, anywhere in the tree.
*.[oa]

# ignore generated html files,
*.html

# except foo.html which is maintained by hand
!foo.html

# exclude everything except directory foo/bar
/*
!/foo
/foo/*
!/foo/bar

0 comments on commit 8ab19a6

Please sign in to comment.