Skip to content

Commit cb7a18b

Browse files
authored
Merge pull request #118 from Mingun/inference-match-result
Optimize redundant fail checks
2 parents 48fd800 + 3a25229 commit cb7a18b

File tree

10 files changed

+867
-631
lines changed

10 files changed

+867
-631
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ Released: TBD
9090

9191
By default this new option contains an array with [reserved JavaScript words][reserved]
9292
[@Mingun](https://github.com/peggyjs/peggy/pull/150)
93+
- Several optimizations in the generator. Generated parsers should now be faster and smaller
94+
[@Mingun](https://github.com/peggyjs/peggy/pull/118)
9395

9496
[reserved]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#reserved_keywords_as_of_ecmascript_2015
9597

lib/compiler/index.js

+9-7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
const generateBytecode = require("./passes/generate-bytecode");
44
const generateJS = require("./passes/generate-js");
5+
const inferenceMatchResult = require("./passes/inference-match-result");
56
const removeProxyRules = require("./passes/remove-proxy-rules");
67
const reportDuplicateLabels = require("./passes/report-duplicate-labels");
78
const reportDuplicateRules = require("./passes/report-duplicate-rules");
@@ -38,21 +39,22 @@ const compiler = {
3839
// or modify it as needed. If the pass encounters a semantic error, it throws
3940
// |peg.GrammarError|.
4041
passes: {
41-
check: {
42+
check: [
4243
reportUndefinedRules,
4344
reportDuplicateRules,
4445
reportDuplicateLabels,
4546
reportInfiniteRecursion,
4647
reportInfiniteRepetition,
4748
reportIncorrectPlucking
48-
},
49-
transform: {
50-
removeProxyRules
51-
},
52-
generate: {
49+
],
50+
transform: [
51+
removeProxyRules,
52+
inferenceMatchResult,
53+
],
54+
generate: [
5355
generateBytecode,
5456
generateJS
55-
}
57+
]
5658
},
5759

5860
// Generates a parser from a specified grammar AST. Throws |peg.GrammarError|

lib/compiler/passes/generate-bytecode.js

+100-27
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
const asts = require("../asts");
44
const op = require("../opcodes");
55
const visitor = require("../visitor");
6+
const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-match-result");
67

78
// Generates bytecode.
89
//
@@ -195,6 +196,27 @@ const visitor = require("../visitor");
195196
// [29] SILENT_FAILS_OFF
196197
//
197198
// silentFails--;
199+
//
200+
// This pass can use the results of other previous passes, each of which can
201+
// change the AST (and, as consequence, the bytecode).
202+
//
203+
// In particular, if the pass |inferenceMatchResult| has been run before this pass,
204+
// then each AST node will contain a |match| property, which represents a possible
205+
// match result of the node:
206+
// - `<0` - node is never matched, for example, `!('a'*)` (negation of the always
207+
// matched node). Generator can put |FAILED| to the stack immediately
208+
// - `=0` - sometimes node matched, sometimes not. This is the same behavior
209+
// when |match| is missed
210+
// - `>0` - node is always matched, for example, `'a'*` (because result is an
211+
// empty array, or an array with some elements). The generator does not
212+
// need to add a check for |FAILED|, because it is impossible
213+
//
214+
// To handle the situation, when the |inferenceMatchResult| has not run (that
215+
// happens, for example, in tests), the |match| value extracted using the
216+
// `|0` trick, which performing cast of any value to an integer with value `0`
217+
// that is equivalent of an unknown match result and signals the generator that
218+
// runtime check for the |FAILED| is required. Trick is explained on the
219+
// Wikipedia page (https://en.wikipedia.org/wiki/Asm.js#Code_generation)
198220
function generateBytecode(ast) {
199221
const literals = [];
200222
const classes = [];
@@ -248,7 +270,10 @@ function generateBytecode(ast) {
248270
return first.concat(...args);
249271
}
250272

251-
function buildCondition(condCode, thenCode, elseCode) {
273+
function buildCondition(match, condCode, thenCode, elseCode) {
274+
if (match === ALWAYS_MATCH) { return thenCode; }
275+
if (match === NEVER_MATCH) { return elseCode; }
276+
252277
return condCode.concat(
253278
[thenCode.length, elseCode.length],
254279
thenCode,
@@ -267,6 +292,8 @@ function generateBytecode(ast) {
267292
}
268293

269294
function buildSimplePredicate(expression, negative, context) {
295+
const match = expression.match | 0;
296+
270297
return buildSequence(
271298
[op.PUSH_CURR_POS],
272299
[op.SILENT_FAILS_ON],
@@ -277,6 +304,7 @@ function generateBytecode(ast) {
277304
}),
278305
[op.SILENT_FAILS_OFF],
279306
buildCondition(
307+
negative ? -match : match,
280308
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
281309
buildSequence(
282310
[op.POP],
@@ -292,15 +320,16 @@ function generateBytecode(ast) {
292320
);
293321
}
294322

295-
function buildSemanticPredicate(code, negative, context) {
323+
function buildSemanticPredicate(node, negative, context) {
296324
const functionIndex = addFunctionConst(
297-
true, Object.keys(context.env), code
325+
true, Object.keys(context.env), node.code
298326
);
299327

300328
return buildSequence(
301329
[op.UPDATE_SAVED_POS],
302330
buildCall(functionIndex, 0, context.env, context.sp),
303331
buildCondition(
332+
node.match | 0,
304333
[op.IF],
305334
buildSequence(
306335
[op.POP],
@@ -341,7 +370,11 @@ function generateBytecode(ast) {
341370
},
342371

343372
named(node, context) {
344-
const nameIndex = addExpectedConst({ type: "rule", value: node.name });
373+
const match = node.match | 0;
374+
// Expectation not required if node always fail
375+
const nameIndex = match === NEVER_MATCH ? null : addExpectedConst(
376+
{ type: "rule", value: node.name }
377+
);
345378

346379
// The code generated below is slightly suboptimal because |FAIL| pushes
347380
// to the stack, so we need to stick a |POP| in front of it. We lack a
@@ -351,20 +384,34 @@ function generateBytecode(ast) {
351384
[op.SILENT_FAILS_ON],
352385
generate(node.expression, context),
353386
[op.SILENT_FAILS_OFF],
354-
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
387+
buildCondition(match, [op.IF_ERROR], [op.FAIL, nameIndex], [])
355388
);
356389
},
357390

358391
choice(node, context) {
359392
function buildAlternativesCode(alternatives, context) {
393+
const match = alternatives[0].match | 0;
394+
const first = generate(alternatives[0], {
395+
sp: context.sp,
396+
env: cloneEnv(context.env),
397+
action: null
398+
});
399+
// If an alternative always match, no need to generate code for the next
400+
// alternatives. Because their will never tried to match, any side-effects
401+
// from next alternatives is impossible so we can skip their generation
402+
if (match === ALWAYS_MATCH) {
403+
return first;
404+
}
405+
406+
// Even if an alternative never match it can have side-effects from
407+
// a semantic predicates or an actions, so we can not skip generation
408+
// of the first alternative.
409+
// We can do that when analysis for possible side-effects will be introduced
360410
return buildSequence(
361-
generate(alternatives[0], {
362-
sp: context.sp,
363-
env: cloneEnv(context.env),
364-
action: null
365-
}),
411+
first,
366412
alternatives.length > 1
367413
? buildCondition(
414+
SOMETIMES_MATCH,
368415
[op.IF_ERROR],
369416
buildSequence(
370417
[op.POP],
@@ -382,21 +429,24 @@ function generateBytecode(ast) {
382429
action(node, context) {
383430
const env = cloneEnv(context.env);
384431
const emitCall = node.expression.type !== "sequence"
385-
|| node.expression.elements.length === 0;
432+
|| node.expression.elements.length === 0;
386433
const expressionCode = generate(node.expression, {
387434
sp: context.sp + (emitCall ? 1 : 0),
388435
env,
389436
action: node
390437
});
391-
const functionIndex = addFunctionConst(
392-
false, Object.keys(env), node.code
393-
);
438+
const match = node.expression.match | 0;
439+
// Function only required if expression can match
440+
const functionIndex = emitCall && match !== NEVER_MATCH
441+
? addFunctionConst(false, Object.keys(env), node.code)
442+
: null;
394443

395444
return emitCall
396445
? buildSequence(
397446
[op.PUSH_CURR_POS],
398447
expressionCode,
399448
buildCondition(
449+
match,
400450
[op.IF_NOT_ERROR],
401451
buildSequence(
402452
[op.LOAD_SAVED_POS, 1],
@@ -412,8 +462,7 @@ function generateBytecode(ast) {
412462
sequence(node, context) {
413463
function buildElementsCode(elements, context) {
414464
if (elements.length > 0) {
415-
const processedCount
416-
= node.elements.length - elements.slice(1).length;
465+
const processedCount = node.elements.length - elements.length + 1;
417466

418467
return buildSequence(
419468
generate(elements[0], {
@@ -423,6 +472,7 @@ function generateBytecode(ast) {
423472
action: null
424473
}),
425474
buildCondition(
475+
elements[0].match | 0,
426476
[op.IF_NOT_ERROR],
427477
buildElementsCode(elements.slice(1), {
428478
sp: context.sp + 1,
@@ -508,6 +558,7 @@ function generateBytecode(ast) {
508558
action: null
509559
}),
510560
buildCondition(
561+
node.match | 0,
511562
[op.IF_NOT_ERROR],
512563
buildSequence([op.POP], [op.TEXT]),
513564
[op.NIP]
@@ -531,6 +582,10 @@ function generateBytecode(ast) {
531582
action: null
532583
}),
533584
buildCondition(
585+
// Check expression match, not the node match
586+
// If expression always match, no need to replace FAILED to NULL,
587+
// because FAILED will never appeared
588+
-(node.expression.match | 0),
534589
[op.IF_ERROR],
535590
buildSequence([op.POP], [op.PUSH_NULL]),
536591
[]
@@ -564,6 +619,8 @@ function generateBytecode(ast) {
564619
[op.PUSH_EMPTY_ARRAY],
565620
expressionCode,
566621
buildCondition(
622+
// Condition depends on the expression match, not the node match
623+
node.expression.match | 0,
567624
[op.IF_NOT_ERROR],
568625
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
569626
buildSequence([op.POP], [op.POP], [op.PUSH_FAILED])
@@ -580,11 +637,11 @@ function generateBytecode(ast) {
580637
},
581638

582639
semantic_and(node, context) {
583-
return buildSemanticPredicate(node.code, false, context);
640+
return buildSemanticPredicate(node, false, context);
584641
},
585642

586643
semantic_not(node, context) {
587-
return buildSemanticPredicate(node.code, true, context);
644+
return buildSemanticPredicate(node, true, context);
588645
},
589646

590647
rule_ref(node) {
@@ -593,19 +650,26 @@ function generateBytecode(ast) {
593650

594651
literal(node) {
595652
if (node.value.length > 0) {
596-
const stringIndex = addLiteralConst(
653+
const match = node.match | 0;
654+
// String only required if condition is generated or string is
655+
// case-sensitive and node always match
656+
const needConst = match === SOMETIMES_MATCH
657+
|| (match === ALWAYS_MATCH && !node.ignoreCase);
658+
const stringIndex = needConst ? addLiteralConst(
597659
node.ignoreCase ? node.value.toLowerCase() : node.value
598-
);
599-
const expectedIndex = addExpectedConst({
660+
) : null;
661+
// Expectation not required if node always match
662+
const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst({
600663
type: "literal",
601664
value: node.value,
602665
ignoreCase: node.ignoreCase
603-
});
666+
}) : null;
604667

605668
// For case-sensitive strings the value must match the beginning of the
606669
// remaining input exactly. As a result, we can use |ACCEPT_STRING| and
607670
// save one |substr| call that would be needed if we used |ACCEPT_N|.
608671
return buildCondition(
672+
match,
609673
node.ignoreCase
610674
? [op.MATCH_STRING_IC, stringIndex]
611675
: [op.MATCH_STRING, stringIndex],
@@ -620,25 +684,34 @@ function generateBytecode(ast) {
620684
},
621685

622686
class(node) {
623-
const classIndex = addClassConst(node);
624-
const expectedIndex = addExpectedConst({
687+
const match = node.match | 0;
688+
// Character class constant only required if condition is generated
689+
const classIndex = match === SOMETIMES_MATCH ? addClassConst(node) : null;
690+
// Expectation not required if node always match
691+
const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst({
625692
type: "class",
626693
value: node.parts,
627694
inverted: node.inverted,
628695
ignoreCase: node.ignoreCase
629-
});
696+
}) : null;
630697

631698
return buildCondition(
699+
match,
632700
[op.MATCH_CHAR_CLASS, classIndex],
633701
[op.ACCEPT_N, 1],
634702
[op.FAIL, expectedIndex]
635703
);
636704
},
637705

638-
any() {
639-
const expectedIndex = addExpectedConst({ type: "any" });
706+
any(node) {
707+
const match = node.match | 0;
708+
// Expectation not required if node always match
709+
const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst({
710+
type: "any"
711+
}) : null;
640712

641713
return buildCondition(
714+
match,
642715
[op.MATCH_ANY],
643716
[op.ACCEPT_N, 1],
644717
[op.FAIL, expectedIndex]

0 commit comments

Comments
 (0)