Skip to content

Commit

Permalink
fix(parser): greatly improve expression parsing performance (#90)
Browse files Browse the repository at this point in the history
Previously, the parser needed to do a lot of backtracking while parsing conditionals and operations which massively degraded performance for deeply nested expressions.

This fix inlines the handling for operations and conditionals into the `Expression` rule so that an already matched `ExprTerm` rule does not need to be re-evaluated when no operation/conditional is matched (which is the case in the vast majority of cases since literal values and collections are way more common than these.

Fixes #82
  • Loading branch information
martinohmann authored Oct 12, 2022
1 parent 7dd8e90 commit a5b57ef
Show file tree
Hide file tree
Showing 5 changed files with 313 additions and 223 deletions.
33 changes: 33 additions & 0 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,39 @@ fn benchmark(c: &mut Criterion) {
c.bench_function("hcl::to_string(&Value)", |b| {
b.iter(|| hcl::to_string(&value))
});

let deeply_nested = r#"
variable "network_integration" {
description = "Map of networking integrations between accounts"
type = map(object({
friendly_name = string,
vpcs = map(object({
id = string
cidr = string
region = string
description = string
subnets = map(string)
route_tables = map(string)
security_groups = map(object({
id = string
rules = map(object({
direction = string
protocol = string
from_port = string
to_port = string
description = string
}))
}))
}))
additional_propagated_vpcs = list(string)
additional_static_vpc_routes = list(string)
}))
default = {}
}"#;

c.bench_function("hcl::parse(&deeply_nested)", |b| {
b.iter(|| hcl::parse(&deeply_nested))
});
}

criterion_group!(benches, benchmark);
Expand Down
2 changes: 1 addition & 1 deletion specsuite/hcl/invalid-heredoc.hcl.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"message": "EOF must be immediately followed by a newline",
"body": {
"message": " --> 2:12\n |\n2 | source = <<-EOF # This is invalid\n | ^---\n |\n = expected ExprTerm, Operation, or Conditional"
"message": " --> 2:12\n |\n2 | source = <<-EOF # This is invalid\n | ^---\n |\n = expected Expression"
}
}
38 changes: 15 additions & 23 deletions src/parser/grammar/hcl.pest
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@ Block = { Identifier ~ (StringLit | Identifier)* ~ BlockBody }
BlockBody = { "{" ~ Body ~ "}" }

// Expressions
Expression = _{ Conditional | Operation | ExprTerm }
ExprTerm = { ExprTermInner ~ (Splat | GetAttr | Index)* }
ExprTermInner = _{ Value | TemplateExpr | FunctionCall | Variable | ForExpr | Parenthesis }
Parenthesis = { "(" ~ Expression ~ ")" }
Expression = {
UnaryOperator? ~
ExprTerm ~
(BinaryOperator ~ Expression)? ~
("?" ~ Expression ~ ":" ~ Expression)?
}
ExprTerm = {
(Value | TemplateExpr | FunctionCall | Variable | ForExpr | Parenthesis) ~
(Splat | GetAttr | Index)*
}
Parenthesis = { "(" ~ Expression ~ ")" }

// Values
Value = _{ LiteralValue | CollectionValue }
Expand Down Expand Up @@ -84,16 +91,9 @@ StringPart = {
}

// Functions and function calls
FunctionCall = ${
Identifier ~ "(" ~
WHITESPACE* ~ (COMMENT ~ WHITESPACE*)* ~
Arguments ~
WHITESPACE* ~ (COMMENT ~ WHITESPACE*)* ~
")"
}

Arguments = !{ (Expression ~ ("," ~ Expression)* ~ ("," | ExpandFinal)?)? }
ExpandFinal = { "..." }
FunctionCall = { Identifier ~ Arguments }
Arguments = { "(" ~ (Expression ~ ("," ~ Expression)* ~ ("," | ExpandFinal)?)? ~ ")" }
ExpandFinal = { "..." }

// For expressions
ForExpr = { ForTupleExpr | ForObjectExpr }
Expand All @@ -118,21 +118,13 @@ Splat = _{ (AttrSplat ~ GetAttr*) | (FullSplat ~ (GetAttr | Index)*) }
AttrSplat = { ".*" }
FullSplat = { "[*]" }

// Operations
Operation = { UnaryOp | BinaryOp }
UnaryOp = { UnaryOperator ~ Expression }
// Unary and binary operators
UnaryOperator = { "-" | "!" }
BinaryOp = { ExprTerm ~ BinaryOperator ~ Expression }
BinaryOperator = { CompareOperator | ArithmeticOperator | LogicOperator }
CompareOperator = { "==" | "!=" | "<=" | ">=" | "<" | ">" }
ArithmeticOperator = { "+" | "-" | "*" | "/" | "%" }
LogicOperator = { "&&" | "||" }


// Conditional operator
CondExpr = _{ ExprTerm | Operation }
Conditional = { CondExpr ~ "?" ~ Expression ~ ":" ~ Expression }

// Comments
COMMENT = _{ InlineComment | BlockComment }
InlineComment = _{ ("#" | "//") ~ (!EoInlineComment ~ ANY)* }
Expand Down
109 changes: 55 additions & 54 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,60 +112,58 @@ fn parse_block_body(pair: Pair<Rule>) -> Result<Body> {
}

fn parse_expression(pair: Pair<Rule>) -> Result<Expression> {
match pair.as_rule() {
Rule::ExprTerm => parse_expr_term(pair),
Rule::Operation => match parse_operation(inner(pair))? {
Operation::Binary(binary) => Ok(Expression::from(Operation::Binary(binary))),
Operation::Unary(unary) => match (unary.operator, unary.expr) {
// Negate operations on numbers are just converted to negative numbers for
// convenience.
(UnaryOperator::Neg, Expression::Number(num)) => Ok(Expression::Number(-num)),
(operator, expr) => Ok(Expression::from(Operation::Unary(UnaryOp {
operator,
expr,
}))),
},
},
Rule::Conditional => Ok(Expression::from(parse_conditional(pair)?)),
rule => unexpected_rule(rule),
}
let pairs = pair.into_inner();
let (expr, pairs) = parse_unary_op(pairs)?;
let (expr, pairs) = parse_binary_op(expr, pairs)?;
parse_conditional(expr, pairs)
}

fn parse_conditional(pair: Pair<Rule>) -> Result<Conditional> {
let mut pairs = pair.into_inner();
fn parse_unary_op(mut pairs: Pairs<Rule>) -> Result<(Expression, Pairs<Rule>)> {
let pair = pairs.next().unwrap();

Ok(Conditional {
cond_expr: parse_expression(pairs.next().unwrap())?,
true_expr: parse_expression(pairs.next().unwrap())?,
false_expr: parse_expression(pairs.next().unwrap())?,
})
}
let expr = match pair.as_rule() {
Rule::UnaryOperator => {
let operator = UnaryOperator::from_str(pair.as_str())?;
let expr = parse_expr_term(pairs.next().unwrap())?;

fn parse_operation(pair: Pair<Rule>) -> Result<Operation> {
match pair.as_rule() {
Rule::UnaryOp => parse_unary_op(pair).map(Operation::Unary),
Rule::BinaryOp => parse_binary_op(pair).map(Operation::Binary),
rule => unexpected_rule(rule),
}
match (operator, expr) {
(UnaryOperator::Neg, Expression::Number(num)) => Expression::Number(-num),
(operator, expr) => Expression::from(Operation::Unary(UnaryOp { operator, expr })),
}
}
_ => parse_expr_term(pair)?,
};

Ok((expr, pairs))
}

fn parse_unary_op(pair: Pair<Rule>) -> Result<UnaryOp> {
let mut pairs = pair.into_inner();
fn parse_binary_op(expr: Expression, mut pairs: Pairs<Rule>) -> Result<(Expression, Pairs<Rule>)> {
let expr = match pairs.peek() {
Some(pair) => match pair.as_rule() {
Rule::BinaryOperator => Expression::from(Operation::Binary(BinaryOp {
lhs_expr: expr,
operator: pairs.next().unwrap().as_str().parse()?,
rhs_expr: parse_expression(pairs.next().unwrap())?,
})),
_ => expr,
},
None => expr,
};

Ok(UnaryOp {
operator: pairs.next().unwrap().as_str().parse()?,
expr: parse_expression(pairs.next().unwrap())?,
})
Ok((expr, pairs))
}

fn parse_binary_op(pair: Pair<Rule>) -> Result<BinaryOp> {
let mut pairs = pair.into_inner();
fn parse_conditional(expr: Expression, mut pairs: Pairs<Rule>) -> Result<Expression> {
let expr = match pairs.next() {
Some(pair) => Expression::from(Conditional {
cond_expr: expr,
true_expr: parse_expression(pair)?,
false_expr: parse_expression(pairs.next().unwrap())?,
}),
None => expr,
};

Ok(BinaryOp {
lhs_expr: parse_expr_term(pairs.next().unwrap())?,
operator: pairs.next().unwrap().as_str().parse()?,
rhs_expr: parse_expression(pairs.next().unwrap())?,
})
Ok(expr)
}

fn parse_expressions(pair: Pair<Rule>) -> Result<Vec<Expression>> {
Expand All @@ -183,10 +181,7 @@ fn parse_expr_term(pair: Pair<Rule>) -> Result<Expression> {
Rule::Int => Expression::Number(parse_primitive::<i64>(pair).into()),
Rule::NullLit => Expression::Null,
Rule::StringLit => parse_string(inner(pair)).map(Expression::String)?,
Rule::TemplateExpr => {
let expr = parse_template_expr(inner(pair));
Expression::TemplateExpr(Box::new(expr))
}
Rule::TemplateExpr => Expression::TemplateExpr(Box::new(parse_template_expr(inner(pair)))),
Rule::Tuple => parse_expressions(pair).map(Expression::Array)?,
Rule::Object => parse_object(pair).map(Expression::Object)?,
Rule::Variable => Expression::Variable(parse_ident(pair).into()),
Expand All @@ -196,12 +191,18 @@ fn parse_expr_term(pair: Pair<Rule>) -> Result<Expression> {
rule => unexpected_rule(rule),
};

match pairs.peek() {
Some(_) => {
let operators = pairs.map(parse_traversal_operator).collect::<Result<_>>()?;
Ok(Expression::from(Traversal { expr, operators }))
}
None => Ok(expr),
parse_traversal(expr, pairs)
}

fn parse_traversal(expr: Expression, pairs: Pairs<Rule>) -> Result<Expression> {
let operators = pairs
.map(parse_traversal_operator)
.collect::<Result<Vec<TraversalOperator>>>()?;

if !operators.is_empty() {
Ok(Expression::from(Traversal { expr, operators }))
} else {
Ok(expr)
}
}

Expand Down
Loading

0 comments on commit a5b57ef

Please sign in to comment.