Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop matching as soon as we found the best possible match #150

Merged
merged 2 commits into from
Apr 28, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 87 additions & 71 deletions src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ struct RegexMatch {
regions: Region,
context: ContextPtr,
pat_index: usize,
from_with_prototype: bool,
would_loop: bool,
}

/// maps the pattern to the start index, which is -1 if not found.
Expand Down Expand Up @@ -227,83 +229,20 @@ impl ParseState {
non_consuming_push_at: &mut (usize, usize),
ops: &mut Vec<(usize, ScopeStackOp)>)
-> bool {

let check_pop_loop = {
let (pos, stack_depth) = *non_consuming_push_at;
pos == *start && stack_depth == self.stack.len()
};

let (cur_match, match_from_with_proto, pop_would_loop) = {
let cur_level = &self.stack[self.stack.len() - 1];
let prototype: Option<ContextPtr> = {
let ctx_ref = cur_level.context.borrow();
ctx_ref.prototype.clone()
};

// Trim proto_starts that are no longer valid
while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
self.proto_starts.pop();
}
// Trim proto_starts that are no longer valid
while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
self.proto_starts.pop();
}

// Build an iterator for the contexts we want to visit in order
let context_chain = {
let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
// Sublime applies with_prototypes from bottom to top
let with_prototypes = self.stack[proto_start..].iter().filter_map(|lvl| lvl.prototype.as_ref().map(|ctx| (true, ctx.clone(), lvl.captures.as_ref())));
let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
let cur_context = Some((false, cur_level.context.clone(), cur_level.captures.as_ref())).into_iter();
with_prototypes.chain(cur_prototype).chain(cur_context)
};
let best_match = self.find_best_match(line, *start, search_cache, regions, check_pop_loop);

// println!("{:#?}", cur_level);
// println!("token at {} on {}", start, line.trim_right());

let mut min_start = usize::MAX;
let mut cur_match: Option<RegexMatch> = None;
let mut match_from_with_proto = false;
let mut pop_would_loop = false;

for (from_with_proto, ctx, captures) in context_chain {
for (pat_context_ptr, pat_index) in context_iter(ctx) {
let mut pat_context = pat_context_ptr.borrow_mut();
let match_pat = pat_context.match_at_mut(pat_index);

if let Some(match_region) = self.search(
line, *start, match_pat, captures, search_cache, regions
) {
let (match_start, match_end) = match_region.pos(0).unwrap();

// println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);

if match_start < min_start || (match_start == min_start && pop_would_loop) {
// New match is earlier in text than old match,
// or old match was a looping pop at the same
// position.

// println!("setting as current match");

min_start = match_start;
cur_match = Some(RegexMatch {
regions: match_region,
context: pat_context_ptr.clone(),
pat_index,
});
match_from_with_proto = from_with_proto;

let consuming = match_end > *start;
pop_would_loop = check_pop_loop && !consuming && match match_pat.operation {
MatchOperation::Pop => true,
_ => false,
};
}
}
}
}
(cur_match, match_from_with_proto, pop_would_loop)
};

if let Some(reg_match) = cur_match {
if pop_would_loop {
if let Some(reg_match) = best_match {
if reg_match.would_loop {
// A push that doesn't consume anything (a regex that resulted
// in an empty match at the current position) can not be
// followed by a non-consuming pop. Otherwise we're back where
Expand Down Expand Up @@ -341,7 +280,7 @@ impl ParseState {
*start = match_end;

// ignore `with_prototype`s below this if a context is pushed
if match_from_with_proto {
if reg_match.from_with_prototype {
// use current height, since we're before the actual push
self.proto_starts.push(self.stack.len());
}
Expand All @@ -355,6 +294,83 @@ impl ParseState {
}
}

fn find_best_match(&self,
line: &str,
start: usize,
search_cache: &mut SearchCache,
regions: &mut Region,
check_pop_loop: bool)
-> Option<RegexMatch> {
let cur_level = &self.stack[self.stack.len() - 1];
let prototype: Option<ContextPtr> = {
let ctx_ref = cur_level.context.borrow();
ctx_ref.prototype.clone()
};

// Build an iterator for the contexts we want to visit in order
let context_chain = {
let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
// Sublime applies with_prototypes from bottom to top
let with_prototypes = self.stack[proto_start..].iter().filter_map(|lvl| lvl.prototype.as_ref().map(|ctx| (true, ctx.clone(), lvl.captures.as_ref())));
let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
let cur_context = Some((false, cur_level.context.clone(), cur_level.captures.as_ref())).into_iter();
with_prototypes.chain(cur_prototype).chain(cur_context)
};

// println!("{:#?}", cur_level);
// println!("token at {} on {}", start, line.trim_right());

let mut min_start = usize::MAX;
let mut best_match: Option<RegexMatch> = None;
let mut pop_would_loop = false;

for (from_with_proto, ctx, captures) in context_chain {
for (pat_context_ptr, pat_index) in context_iter(ctx) {
let mut pat_context = pat_context_ptr.borrow_mut();
let match_pat = pat_context.match_at_mut(pat_index);

if let Some(match_region) = self.search(
line, start, match_pat, captures, search_cache, regions
) {
let (match_start, match_end) = match_region.pos(0).unwrap();

// println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);

if match_start < min_start || (match_start == min_start && pop_would_loop) {
// New match is earlier in text than old match,
// or old match was a looping pop at the same
// position.

// println!("setting as current match");

min_start = match_start;

let consuming = match_end > start;
pop_would_loop = check_pop_loop && !consuming && match match_pat.operation {
MatchOperation::Pop => true,
_ => false,
};

best_match = Some(RegexMatch {
regions: match_region,
context: pat_context_ptr.clone(),
pat_index,
from_with_prototype: from_with_proto,
would_loop: pop_would_loop,
});

if match_start == start && !pop_would_loop {
// We're not gonna find a better match after this,
// so as an optimization we can stop matching now.
return best_match;
}
}
}
}
}
best_match
}

fn search(&self,
line: &str,
start: usize,
Expand Down