Skip to content

Commit

Permalink
Tree parsing now probably is twice as fast… (#67)
Browse files Browse the repository at this point in the history
…by implementing the few bits that are needed ourselves.
This might open up future optimizations, if they matter.

Controlling the nom parsers on a token by token basis via iterators
probably already saves a lot of time, and these are used everywhere.
  • Loading branch information
Byron committed Sep 22, 2021
1 parent 70179e2 commit d1e2b89
Showing 1 changed file with 35 additions and 7 deletions.
42 changes: 35 additions & 7 deletions git-object/src/tree/ref_iter.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::convert::TryFrom;

use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter};
use nom::error::ParseError;

impl<'a> TreeRefIter<'a> {
/// Instantiate an iterator from the given tree data.
Expand Down Expand Up @@ -43,14 +44,19 @@ impl<'a> Iterator for TreeRefIter<'a> {
if self.data.is_empty() {
return None;
}
match decode::entry(self.data) {
Ok((data_left, entry)) => {
match decode::fast_entry(self.data) {
Some((data_left, entry)) => {
self.data = data_left;
Some(Ok(entry))
}
Err(err) => {
None => {
self.data = &[];
Some(Err(err.into()))
#[allow(clippy::unit_arg)]
Some(Err(nom::Err::Error(crate::decode::ParseError::from_error_kind(
&[] as &[u8],
nom::error::ErrorKind::MapRes,
))
.into()))
}
}
}
Expand All @@ -63,11 +69,11 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
Ok(match mode {
b"40000" => tree::EntryMode::Tree,
b"100644" => tree::EntryMode::Blob,
b"100664" => tree::EntryMode::Blob, // rare and found in the linux kernel
b"100640" => tree::EntryMode::Blob, // rare and found in the Rust repo
b"100755" => tree::EntryMode::BlobExecutable,
b"120000" => tree::EntryMode::Link,
b"160000" => tree::EntryMode::Commit,
b"100664" => tree::EntryMode::Blob, // rare and found in the linux kernel
b"100640" => tree::EntryMode::Blob, // rare and found in the Rust repo
_ => return Err(mode),
})
}
Expand All @@ -91,12 +97,34 @@ mod decode {

const NULL: &[u8] = b"\0";

pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
let (mode, i) = i.split_at(i.find_byte(b' ')?);
let mode = tree::EntryMode::try_from(mode).ok()?;
let i = &i[1..];
let (filename, i) = i.split_at(i.find_byte(0)?);
let i = &i[1..];
const HASH_LEN_FIXME: usize = 20; // TODO: know actual /desired length or we may overshoot
let (oid, i) = match i.len() {
len if len < HASH_LEN_FIXME => return None,
HASH_LEN_FIXME => (i, &[] as &[u8]),
_ => i.split_at(20),
};
Some((
i,
EntryRef {
mode,
filename: filename.as_bstr(),
oid: git_hash::oid::try_from(oid).expect("we counted exactly 20 bytes"),
},
))
}

pub fn entry<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], EntryRef<'_>, E> {
let (i, mode) = terminated(take_while_m_n(5, 6, is_digit), tag(SPACE))(i)?;
let mode = tree::EntryMode::try_from(mode)
.map_err(|invalid| nom::Err::Error(E::from_error_kind(invalid, nom::error::ErrorKind::MapRes)))?;
let (i, filename) = terminated(take_while1(|b| b != NULL[0]), tag(NULL))(i)?;
let (i, oid) = take(20u8)(i)?;
let (i, oid) = take(20u8)(i)?; // TODO: make this compatible with other hash lengths

Ok((
i,
Expand Down

0 comments on commit d1e2b89

Please sign in to comment.