diff --git a/src/main/antlr4/liquid/parser/v4/LiquidLexer.g4 b/src/main/antlr4/liquid/parser/v4/LiquidLexer.g4 index 19231d77..cd03301d 100644 --- a/src/main/antlr4/liquid/parser/v4/LiquidLexer.g4 +++ b/src/main/antlr4/liquid/parser/v4/LiquidLexer.g4 @@ -104,6 +104,7 @@ fragment LineBreak : '\r'? '\n' | '\r'; fragment Letter : [a-zA-Z]; fragment Digit : [0-9]; +// Note that when adding tokens to this `IN_TAG` mode, be sure to include them in the parser rule `not_out_end` as well! mode IN_TAG; OutStart2 : '{{' -> pushMode(IN_TAG); diff --git a/src/main/antlr4/liquid/parser/v4/LiquidParser.g4 b/src/main/antlr4/liquid/parser/v4/LiquidParser.g4 index 35e9a4c6..8b714d69 100644 --- a/src/main/antlr4/liquid/parser/v4/LiquidParser.g4 +++ b/src/main/antlr4/liquid/parser/v4/LiquidParser.g4 @@ -230,8 +230,19 @@ output | {isWarn() || isLax()}? outStart term filter* unparsed=not_out_end? OutEnd ; +// When doing `( ~OutEnd )+`, it appears ANTLR is much slower on large input text. Even when `isStrict() == true` the +// parser will never get here, but the prediction algorithm still tries this branch and takes too much time when the +// much too large set `( ~OutEnd )+` is used. The tokens below are all tokens that are possible when the lexer is in +// the `IN_TAG` mode. +// +// The input from https://github.com/bkiers/Liqp/issues/310 is tested by parsing it 100 times. When this rule contains +// `( ~OutEnd )+`, it ran in about 8000-8500 ms on average. With the individual tokens specified in the `IN_TAG` mode, +// the average runtime was around 3000-3200 ms. not_out_end - : ( ~OutEnd )+ + : ( OutStart2 | TagEnd | Str | DotDot | Dot | NEq | Eq | EqSign | GtEq | Gt | LtEq | Lt | Minus | Pipe + | Col | Comma | OPar | CPar | OBr | CBr | QMark | PathSep | DoubleNum | LongNum | Contains | In | And + | Or | True | False | Nil | With | Offset | Continue | Reversed | Empty | Blank | IdChain | Id + )+ ; filter