Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change loop detection to work like Sublime Text #146

Merged
merged 4 commits into from
Apr 27, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 41 additions & 49 deletions src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,17 @@ use fnv::FnvHasher;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseState {
stack: Vec<StateLevel>,
first_line: bool,
// See issue #101. Contains indices of frames pushed by `with_prototype`s.
// Doesn't look at `with_prototype`s below top of stack.
proto_starts: Vec<usize>,
// The line being parsed (starting at 0)
line: usize,
}

#[derive(Debug, Clone)]
struct StateLevel {
context: ContextPtr,
prototype: Option<ContextPtr>,
captures: Option<(Region, String)>,
non_consuming_push_at: Option<(usize, usize)>,
}

fn context_ptr_eq(a: &ContextPtr, b: &ContextPtr) -> bool {
Expand All @@ -65,10 +63,6 @@ struct RegexMatch {
regions: Region,
context: ContextPtr,
pat_index: usize,
/// Whether the match consumed at least one input character or not.
/// E.g. an empty match at the current position doesn't consume anything,
/// it doesn't advance the position at which we're matching.
consuming: bool,
}

/// maps the pattern to the start index, which is -1 if not found.
Expand Down Expand Up @@ -174,12 +168,11 @@ impl ParseState {
context: syntax.contexts["__start"].clone(),
prototype: None,
captures: None,
non_consuming_push_at: None,
};
ParseState {
stack: vec![start_state],
first_line: true,
proto_starts: Vec::new(),
line: 0,
}
}

Expand All @@ -199,23 +192,25 @@ impl ParseState {
let mut match_start = 0;
let mut res = Vec::new();

if self.line == 0 {
if self.first_line {
let cur_level = &self.stack[self.stack.len() - 1];
let context = cur_level.context.borrow();
if !context.meta_content_scope.is_empty() {
res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
}
self.first_line = false;
}
self.line += 1;

let mut regions = Region::with_capacity(8);
let fnv = BuildHasherDefault::<FnvHasher>::default();
let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
let mut non_consuming_push_at = None;

while self.parse_next_token(line,
&mut match_start,
&mut search_cache,
&mut regions,
&mut non_consuming_push_at,
&mut res) {
}

Expand All @@ -227,8 +222,14 @@ impl ParseState {
start: &mut usize,
search_cache: &mut SearchCache,
regions: &mut Region,
non_consuming_push_at: &mut Option<(usize, usize)>,
ops: &mut Vec<(usize, ScopeStackOp)>)
-> bool {

let check_pop_loop = non_consuming_push_at
.map(|(pos, stack_depth)| pos == *start && stack_depth == self.stack.len())
.unwrap_or(false);

let (cur_match, match_from_with_proto, pop_would_loop) = {
let cur_level = &self.stack[self.stack.len() - 1];
let prototype: Option<ContextPtr> = {
Expand Down Expand Up @@ -259,10 +260,6 @@ impl ParseState {
let mut match_from_with_proto = false;
let mut pop_would_loop = false;

let check_pop_loop = cur_level.non_consuming_push_at
.map(|(line, pos)| self.line == line && pos == *start)
.unwrap_or(false);

for (from_with_proto, ctx, captures) in context_chain {
for (pat_context_ptr, pat_index) in context_iter(ctx) {
let mut pat_context = pat_context_ptr.borrow_mut();
Expand All @@ -282,16 +279,15 @@ impl ParseState {

// println!("setting as current match");

let consuming = match_end > *start;

min_start = match_start;
cur_match = Some(RegexMatch {
regions: match_region,
context: pat_context_ptr.clone(),
pat_index,
consuming,
});
match_from_with_proto = from_with_proto;

let consuming = match_end > *start;
pop_would_loop = check_pop_loop && !consuming && match match_pat.operation {
MatchOperation::Pop => true,
_ => false,
Expand Down Expand Up @@ -321,11 +317,31 @@ impl ParseState {
return false;
}
*start += 1;
*non_consuming_push_at = None;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also unnecessary, see other similar comment for more.

return true;
}

let match_end = reg_match.regions.pos(0).unwrap().1;
*start = match_end;
let consuming = match_end > *start;
if consuming {
// The match consumes some characters. So update the position
// and clear state we use for checking for loops.
*start = match_end;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be done unconditionally, combined with the next comment, this branch of the if is unnecessary. If you think it's easier to understand this way then I'm fine with it though.

*non_consuming_push_at = None;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is strictly necessary, since any future checks against it won't succeed since it will have an earlier start position. But it's totally fine to leave it, maybe makes it easier to think about.

I also realized that you can even avoid the logic of it being an Option because (0,0) is a starting state that should act correctly. Unsure of whether I prefer the cleaner logic of doing that or the perhaps more conceptually nice way of using an option.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, all good ideas :). Done!

} else {
// The match doesn't consume any characters. If this is a
// "push", remember the position and stack size so that we can
// check the next "pop" for loops. Otherwise leave the state,
// because a non-consuming "set" could also result in a loop.
let context = reg_match.context.borrow();
let match_pattern = context.match_at(reg_match.pat_index);
match match_pattern.operation {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be an if let

MatchOperation::Push(_) => {
*non_consuming_push_at = Some((match_end, self.stack.len() + 1));
},
_ => {}
}
}

// ignore `with_prototype`s below this if a context is pushed
if match_from_with_proto {
Expand Down Expand Up @@ -457,7 +473,7 @@ impl ParseState {
}
self.push_meta_ops(false, match_end, &*level_context, &pat.operation, ops);

self.perform_op(line, &reg_match.regions, reg_match.consuming, pat)
self.perform_op(line, &reg_match.regions, pat)
}

fn push_meta_ops(&self,
Expand Down Expand Up @@ -565,28 +581,12 @@ impl ParseState {
}

/// Returns true if the stack was changed
fn perform_op(&mut self, line: &str, regions: &Region, consuming: bool, pat: &MatchPattern) -> bool {
let (ctx_refs, non_consuming_push_at) = match pat.operation {
MatchOperation::Push(ref ctx_refs) => {
let match_end = regions.pos(0).unwrap().1;
let non_consuming_push_at = if !consuming {
Some((self.line, match_end))
} else {
None
};
(ctx_refs, non_consuming_push_at)
},
fn perform_op(&mut self, line: &str, regions: &Region, pat: &MatchPattern) -> bool {
let ctx_refs = match pat.operation {
MatchOperation::Push(ref ctx_refs) => ctx_refs,
MatchOperation::Set(ref ctx_refs) => {
let previous = self.stack.pop();
// If the "set" replaces a state with a non-consuming match,
// we need to keep checking for looping, otherwise it would be
// possible for a sequence like "push -> set -> pop" to loop.
let non_consuming_push_at = if !consuming {
previous.and_then(|s| s.non_consuming_push_at)
} else {
None
};
(ctx_refs, non_consuming_push_at)
self.stack.pop();
ctx_refs
}
MatchOperation::Pop => {
self.stack.pop();
Expand Down Expand Up @@ -617,18 +617,10 @@ impl ParseState {
None
}
};
// If multiple contexts are pushed, we only need to check for a loop
// when we get back to the first context that was pushed.
let non_consuming_push_at = if i == 0 {
non_consuming_push_at
} else {
None
};
self.stack.push(StateLevel {
context: ctx_ptr,
prototype: proto,
captures,
non_consuming_push_at,
});
}
true
Expand Down