Skip to content

Commit

Permalink
Merge branch 'attributes-cache'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Apr 24, 2023
2 parents 3180142 + 13a070f commit 3456c84
Show file tree
Hide file tree
Showing 30 changed files with 1,026 additions and 294 deletions.
28 changes: 18 additions & 10 deletions gix-attributes/src/search/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ use crate::{

/// Instantiation and initialization.
impl Search {
/// Create a search instance preloaded with *built-ins* as well as attribute `files` from various global locations.
/// Create a search instance preloaded with *built-ins* followed by attribute `files` from various global locations.
///
/// See [`Source`][crate::Source] for a way to obtain these paths.
///
/// Note that parsing is lenient and errors are logged.
/// `buf` is used to read `files` from disk which will be ignored if they do not exist.
/// `collection` will be updated with information necessary to perform lookups later.
///
/// * `buf` is used to read `files` from disk which will be ignored if they do not exist.
/// * `collection` will be updated with information necessary to perform lookups later.
pub fn new_globals(
files: impl IntoIterator<Item = impl Into<PathBuf>>,
buf: &mut Vec<u8>,
Expand All @@ -36,7 +39,7 @@ impl Search {
/// Add the given file at `source` to our patterns if it exists, otherwise do nothing.
/// Update `collection` with newly added attribute names.
/// If a `root` is provided, it's not considered a global file anymore.
/// Returns true if the file was added, or false if it didn't exist.
/// Returns `true` if the file was added, or `false` if it didn't exist.
pub fn add_patterns_file(
&mut self,
source: impl Into<PathBuf>,
Expand All @@ -63,17 +66,22 @@ impl Search {
self.patterns.push(pattern::List::from_bytes(bytes, source, root));
collection.update_from_list(self.patterns.last_mut().expect("just added"));
}

/// Pop the last attribute patterns list from our queue.
pub fn pop_pattern_list(&mut self) -> Option<gix_glob::search::pattern::List<Attributes>> {
self.patterns.pop()
}
}

/// Access and matching
impl Search {
/// Match `relative_path`, a path relative to the repository, while respective `case`-sensitivity and write them to `out`
/// Return true if at least one pattern matched.
/// Return `true` if at least one pattern matched.
pub fn pattern_matching_relative_path<'a, 'b>(
&'a self,
relative_path: impl Into<&'b BStr>,
case: gix_glob::pattern::Case,
out: &mut Outcome<'a>,
out: &mut Outcome,
) -> bool {
let relative_path = relative_path.into();
let basename_pos = relative_path.rfind(b"/").map(|p| p + 1);
Expand Down Expand Up @@ -166,12 +174,12 @@ fn macro_mode() -> gix_glob::pattern::Mode {
/// `is_dir` is true if `relative_path` is a directory.
/// Return `true` if at least one pattern matched.
#[allow(unused_variables)]
fn pattern_matching_relative_path<'a>(
list: &'a gix_glob::search::pattern::List<Attributes>,
fn pattern_matching_relative_path(
list: &gix_glob::search::pattern::List<Attributes>,
relative_path: &BStr,
basename_pos: Option<usize>,
case: gix_glob::pattern::Case,
out: &mut Outcome<'a>,
out: &mut Outcome,
) -> bool {
let (relative_path, basename_start_pos) =
match list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case) {
Expand Down Expand Up @@ -199,7 +207,7 @@ fn pattern_matching_relative_path<'a>(
if out.has_unspecified_attributes(attrs.iter().map(|attr| attr.id))
&& pattern.matches_repo_relative_path(relative_path, basename_start_pos, None, case)
{
let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_deref(), *sequence_number);
let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_ref(), *sequence_number);
if all_filled {
break 'outer;
}
Expand Down
20 changes: 14 additions & 6 deletions gix-attributes/src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ use std::collections::HashMap;
use kstring::KString;
use smallvec::SmallVec;

use crate::Assignment;
use crate::{Assignment, AssignmentRef};

mod attributes;
mod outcome;
mod refmap;
pub(crate) use refmap::RefMap;

/// A typically sized list of attributes.
pub type Assignments = SmallVec<[TrackedAssignment; AVERAGE_NUM_ATTRS]>;
Expand Down Expand Up @@ -49,7 +51,7 @@ pub struct Match<'a> {
/// The glob pattern itself, like `/target/*`.
pub pattern: &'a gix_glob::Pattern,
/// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern.
pub assignment: Assignment,
pub assignment: AssignmentRef<'a>,
/// Additional information about the kind of match.
pub kind: MatchKind,
/// Information about the location of the match.
Expand Down Expand Up @@ -88,24 +90,30 @@ pub enum MatchKind {

/// The result of a search, containing all matching attributes.
#[derive(Default)]
pub struct Outcome<'pattern> {
pub struct Outcome {
/// The list of all available attributes, by ascending order. Each slots index corresponds to an attribute with that order, i.e.
/// `arr[attr.id] = <attr info>`.
///
/// This list needs to be up-to-date with the search group so all possible attribute names are known.
matches_by_id: Vec<Slot<'pattern>>,
matches_by_id: Vec<Slot>,
/// A stack of attributes to use for processing attributes of matched patterns and for resolving their macros.
attrs_stack: SmallVec<[(AttributeId, Assignment, Option<AttributeId>); 8]>,
/// A set of attributes we should limit ourselves to, or empty if we should fill in all attributes, made of
selected: SmallVec<[(KString, Option<AttributeId>); AVERAGE_NUM_ATTRS]>,
/// storage for all patterns we have matched so far (in order to avoid referencing them, we copy them, but only once).
patterns: RefMap<gix_glob::Pattern>,
/// storage for all assignments we have matched so far (in order to avoid referencing them, we copy them, but only once).
assignments: RefMap<Assignment>,
/// storage for all source paths we have matched so far (in order to avoid referencing them, we copy them, but only once).
source_paths: RefMap<std::path::PathBuf>,
/// The amount of attributes that still need to be set, or `None` if this outcome is consumed which means it
/// needs to be re-initialized.
remaining: Option<usize>,
}

#[derive(Default, Clone)]
struct Slot<'pattern> {
r#match: Option<Match<'pattern>>,
struct Slot {
r#match: Option<outcome::Match>,
/// A list of all assignments, being an empty list for non-macro attributes, or all assignments (with order) for macros.
/// It's used to resolve macros.
macro_attributes: Assignments,
Expand Down
133 changes: 91 additions & 42 deletions gix-attributes/src/search/outcome.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
use std::{borrow::Cow, path::Path};

use bstr::{BString, ByteSlice};
use gix_glob::Pattern;
use kstring::{KString, KStringRef};

use crate::search::refmap::RefMapKey;
use crate::{
search::{
Assignments, AttributeId, Attributes, Match, MatchKind, MatchLocation, Metadata, MetadataCollection, Outcome,
TrackedAssignment, Value,
Assignments, AttributeId, Attributes, MatchKind, Metadata, MetadataCollection, Outcome, TrackedAssignment,
Value,
},
Assignment, NameRef, State,
AssignmentRef, NameRef, StateRef,
};

/// Initialization
impl<'pattern> Outcome<'pattern> {
impl Outcome {
/// Initialize this instance to collect outcomes for all names in `collection`, which represents all possible attributes
/// or macros we may visit.
/// or macros we may visit, and [`reset`][Self::reset()] it unconditionally.
///
/// This must be called after each time `collection` changes.
pub fn initialize(&mut self, collection: &MetadataCollection) {
Expand Down Expand Up @@ -74,7 +73,7 @@ impl<'pattern> Outcome<'pattern> {
}

/// Access
impl<'pattern> Outcome<'pattern> {
impl Outcome {
/// Return an iterator over all filled attributes we were initialized with.
///
/// ### Note
Expand All @@ -88,56 +87,63 @@ impl<'pattern> Outcome<'pattern> {
/// the same as what `git` provides.
/// Ours is in order of declaration, whereas `git` seems to list macros first somehow. Since the values are the same, this
/// shouldn't be an issue.
pub fn iter<'a>(&'a self) -> impl Iterator<Item = &'a Match<'pattern>> + 'a {
self.matches_by_id.iter().filter_map(|item| item.r#match.as_ref())
pub fn iter(&self) -> impl Iterator<Item = crate::search::Match<'_>> {
self.matches_by_id
.iter()
.filter_map(|item| item.r#match.as_ref().map(|m| m.to_outer(self)))
}

/// Iterate over all matches of the attribute selection in their original order.
pub fn iter_selected<'a>(&'a self) -> impl Iterator<Item = Cow<'a, Match<'pattern>>> + 'a {
///
/// This only yields values if this instance was initialized with [`Outcome::initialize_with_selection()`].
pub fn iter_selected(&self) -> impl Iterator<Item = crate::search::Match<'_>> {
static DUMMY: Pattern = Pattern {
text: BString::new(Vec::new()),
mode: gix_glob::pattern::Mode::empty(),
first_wildcard_pos: None,
};
self.selected.iter().map(|(name, id)| {
id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref())
.map(Cow::Borrowed)
.unwrap_or_else(|| {
Cow::Owned(Match {
pattern: &DUMMY,
assignment: Assignment {
name: NameRef::try_from(name.as_bytes().as_bstr())
.unwrap_or_else(|_| NameRef("invalid".into()))
.to_owned(),
state: State::Unspecified,
},
kind: MatchKind::Attribute { macro_id: None },
location: MatchLocation {
source: None,
sequence_number: 0,
},
})
id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref().map(|m| m.to_outer(self)))
.unwrap_or_else(|| crate::search::Match {
pattern: &DUMMY,
assignment: AssignmentRef {
name: NameRef::try_from(name.as_bytes().as_bstr())
.unwrap_or_else(|_| NameRef("invalid".into())),
state: StateRef::Unspecified,
},
kind: MatchKind::Attribute { macro_id: None },
location: crate::search::MatchLocation {
source: None,
sequence_number: 0,
},
})
})
}

/// Obtain a match by the order of its attribute, if the order exists in our initialized attribute list and there was a match.
pub fn match_by_id(&self, id: AttributeId) -> Option<&Match<'pattern>> {
self.matches_by_id.get(id.0).and_then(|m| m.r#match.as_ref())
pub fn match_by_id(&self, id: AttributeId) -> Option<crate::search::Match<'_>> {
self.matches_by_id
.get(id.0)
.and_then(|m| m.r#match.as_ref().map(|m| m.to_outer(self)))
}

/// Return `true` if there is nothing more to be done as all attributes were filled.
pub fn is_done(&self) -> bool {
self.remaining() == 0
}
}

/// Mutation
impl<'pattern> Outcome<'pattern> {
impl Outcome {
/// Fill all `attrs` and resolve them recursively if they are macros. Return `true` if there is no attribute left to be resolved and
/// we are totally done.
/// `pattern` is what matched a patch and is passed for contextual information,
/// providing `sequence_number` and `source` as well.
pub(crate) fn fill_attributes<'a>(
&mut self,
attrs: impl Iterator<Item = &'a TrackedAssignment>,
pattern: &'pattern gix_glob::Pattern,
source: Option<&'pattern Path>,
pattern: &gix_glob::Pattern,
source: Option<&std::path::PathBuf>,
sequence_number: usize,
) -> bool {
self.attrs_stack.extend(attrs.filter_map(|attr| {
Expand All @@ -155,8 +161,8 @@ impl<'pattern> Outcome<'pattern> {
let is_macro = !slot.macro_attributes.is_empty();

slot.r#match = Some(Match {
pattern,
assignment: assignment.to_owned(),
pattern: self.patterns.insert(pattern),
assignment: self.assignments.insert_owned(assignment),
kind: if is_macro {
MatchKind::Macro {
parent_macro_id: parent_order,
Expand All @@ -165,7 +171,7 @@ impl<'pattern> Outcome<'pattern> {
MatchKind::Attribute { macro_id: parent_order }
},
location: MatchLocation {
source,
source: source.map(|path| self.source_paths.insert(path)),
sequence_number,
},
});
Expand All @@ -188,7 +194,7 @@ impl<'pattern> Outcome<'pattern> {
}
}

impl<'attr> Outcome<'attr> {
impl Outcome {
/// Given a list of `attrs` by order, return true if at least one of them is not set
pub(crate) fn has_unspecified_attributes(&self, mut attrs: impl Iterator<Item = AttributeId>) -> bool {
attrs.any(|order| self.matches_by_id[order.0].r#match.is_none())
Expand All @@ -201,11 +207,6 @@ impl<'attr> Outcome<'attr> {
.expect("BUG: instance must be initialized for each search set")
}

/// Return true if there is nothing more to be done as all attributes were filled.
pub(crate) fn is_done(&self) -> bool {
self.remaining() == 0
}

fn reduce_and_check_if_done(&mut self, attr: AttributeId) -> bool {
if self.selected.is_empty()
|| self
Expand Down Expand Up @@ -314,3 +315,51 @@ impl MatchKind {
}
}
}

/// A version of `Match` without references.
#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
pub struct Match {
/// The glob pattern itself, like `/target/*`.
pub pattern: RefMapKey,
/// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern.
pub assignment: RefMapKey,
/// Additional information about the kind of match.
pub kind: MatchKind,
/// Information about the location of the match.
pub location: MatchLocation,
}

impl Match {
fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::Match<'a> {
crate::search::Match {
pattern: out.patterns.resolve(self.pattern).expect("pattern still present"),
assignment: out
.assignments
.resolve(self.assignment)
.expect("assignment present")
.as_ref(),
kind: self.kind,
location: self.location.to_outer(out),
}
}
}

/// A version of `MatchLocation` without references.
#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
pub struct MatchLocation {
/// The path to the source from which the pattern was loaded, or `None` if it was specified by other means.
pub source: Option<RefMapKey>,
/// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided.
pub sequence_number: usize,
}

impl MatchLocation {
fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::MatchLocation<'a> {
crate::search::MatchLocation {
source: self
.source
.and_then(|source| out.source_paths.resolve(source).map(|p| p.as_path())),
sequence_number: self.sequence_number,
}
}
}
Loading

0 comments on commit 3456c84

Please sign in to comment.