3
3
const asts = require ( "../asts" ) ;
4
4
const op = require ( "../opcodes" ) ;
5
5
const visitor = require ( "../visitor" ) ;
6
+ const { ALWAYS_MATCH , SOMETIMES_MATCH , NEVER_MATCH } = require ( "./inference-match-result" ) ;
6
7
7
8
// Generates bytecode.
8
9
//
@@ -195,6 +196,27 @@ const visitor = require("../visitor");
195
196
// [29] SILENT_FAILS_OFF
196
197
//
197
198
// silentFails--;
199
+ //
200
+ // This pass can use the results of other previous passes, each of which can
201
+ // change the AST (and, as consequence, the bytecode).
202
+ //
203
+ // In particular, if the pass |inferenceMatchResult| has been run before this pass,
204
+ // then each AST node will contain a |match| property, which represents a possible
205
+ // match result of the node:
206
+ // - `<0` - node is never matched, for example, `!('a'*)` (negation of the always
207
+ // matched node). Generator can put |FAILED| to the stack immediately
208
+ // - `=0` - sometimes node matched, sometimes not. This is the same behavior
209
+ // when |match| is missed
210
+ // - `>0` - node is always matched, for example, `'a'*` (because result is an
211
+ // empty array, or an array with some elements). The generator does not
212
+ // need to add a check for |FAILED|, because it is impossible
213
+ //
214
+ // To handle the situation, when the |inferenceMatchResult| has not run (that
215
+ // happens, for example, in tests), the |match| value extracted using the
216
+ // `|0` trick, which performing cast of any value to an integer with value `0`
217
+ // that is equivalent of an unknown match result and signals the generator that
218
+ // runtime check for the |FAILED| is required. Trick is explained on the
219
+ // Wikipedia page (https://en.wikipedia.org/wiki/Asm.js#Code_generation)
198
220
function generateBytecode ( ast ) {
199
221
const literals = [ ] ;
200
222
const classes = [ ] ;
@@ -248,7 +270,10 @@ function generateBytecode(ast) {
248
270
return first . concat ( ...args ) ;
249
271
}
250
272
251
- function buildCondition ( condCode , thenCode , elseCode ) {
273
+ function buildCondition ( match , condCode , thenCode , elseCode ) {
274
+ if ( match === ALWAYS_MATCH ) { return thenCode ; }
275
+ if ( match === NEVER_MATCH ) { return elseCode ; }
276
+
252
277
return condCode . concat (
253
278
[ thenCode . length , elseCode . length ] ,
254
279
thenCode ,
@@ -267,6 +292,8 @@ function generateBytecode(ast) {
267
292
}
268
293
269
294
function buildSimplePredicate ( expression , negative , context ) {
295
+ const match = expression . match | 0 ;
296
+
270
297
return buildSequence (
271
298
[ op . PUSH_CURR_POS ] ,
272
299
[ op . SILENT_FAILS_ON ] ,
@@ -277,6 +304,7 @@ function generateBytecode(ast) {
277
304
} ) ,
278
305
[ op . SILENT_FAILS_OFF ] ,
279
306
buildCondition (
307
+ negative ? - match : match ,
280
308
[ negative ? op . IF_ERROR : op . IF_NOT_ERROR ] ,
281
309
buildSequence (
282
310
[ op . POP ] ,
@@ -292,15 +320,16 @@ function generateBytecode(ast) {
292
320
) ;
293
321
}
294
322
295
- function buildSemanticPredicate ( code , negative , context ) {
323
+ function buildSemanticPredicate ( node , negative , context ) {
296
324
const functionIndex = addFunctionConst (
297
- true , Object . keys ( context . env ) , code
325
+ true , Object . keys ( context . env ) , node . code
298
326
) ;
299
327
300
328
return buildSequence (
301
329
[ op . UPDATE_SAVED_POS ] ,
302
330
buildCall ( functionIndex , 0 , context . env , context . sp ) ,
303
331
buildCondition (
332
+ node . match | 0 ,
304
333
[ op . IF ] ,
305
334
buildSequence (
306
335
[ op . POP ] ,
@@ -341,7 +370,11 @@ function generateBytecode(ast) {
341
370
} ,
342
371
343
372
named ( node , context ) {
344
- const nameIndex = addExpectedConst ( { type : "rule" , value : node . name } ) ;
373
+ const match = node . match | 0 ;
374
+ // Expectation not required if node always fail
375
+ const nameIndex = match === NEVER_MATCH ? null : addExpectedConst (
376
+ { type : "rule" , value : node . name }
377
+ ) ;
345
378
346
379
// The code generated below is slightly suboptimal because |FAIL| pushes
347
380
// to the stack, so we need to stick a |POP| in front of it. We lack a
@@ -351,20 +384,34 @@ function generateBytecode(ast) {
351
384
[ op . SILENT_FAILS_ON ] ,
352
385
generate ( node . expression , context ) ,
353
386
[ op . SILENT_FAILS_OFF ] ,
354
- buildCondition ( [ op . IF_ERROR ] , [ op . FAIL , nameIndex ] , [ ] )
387
+ buildCondition ( match , [ op . IF_ERROR ] , [ op . FAIL , nameIndex ] , [ ] )
355
388
) ;
356
389
} ,
357
390
358
391
choice ( node , context ) {
359
392
function buildAlternativesCode ( alternatives , context ) {
393
+ const match = alternatives [ 0 ] . match | 0 ;
394
+ const first = generate ( alternatives [ 0 ] , {
395
+ sp : context . sp ,
396
+ env : cloneEnv ( context . env ) ,
397
+ action : null
398
+ } ) ;
399
+ // If an alternative always match, no need to generate code for the next
400
+ // alternatives. Because their will never tried to match, any side-effects
401
+ // from next alternatives is impossible so we can skip their generation
402
+ if ( match === ALWAYS_MATCH ) {
403
+ return first ;
404
+ }
405
+
406
+ // Even if an alternative never match it can have side-effects from
407
+ // a semantic predicates or an actions, so we can not skip generation
408
+ // of the first alternative.
409
+ // We can do that when analysis for possible side-effects will be introduced
360
410
return buildSequence (
361
- generate ( alternatives [ 0 ] , {
362
- sp : context . sp ,
363
- env : cloneEnv ( context . env ) ,
364
- action : null
365
- } ) ,
411
+ first ,
366
412
alternatives . length > 1
367
413
? buildCondition (
414
+ SOMETIMES_MATCH ,
368
415
[ op . IF_ERROR ] ,
369
416
buildSequence (
370
417
[ op . POP ] ,
@@ -382,21 +429,24 @@ function generateBytecode(ast) {
382
429
action ( node , context ) {
383
430
const env = cloneEnv ( context . env ) ;
384
431
const emitCall = node . expression . type !== "sequence"
385
- || node . expression . elements . length === 0 ;
432
+ || node . expression . elements . length === 0 ;
386
433
const expressionCode = generate ( node . expression , {
387
434
sp : context . sp + ( emitCall ? 1 : 0 ) ,
388
435
env,
389
436
action : node
390
437
} ) ;
391
- const functionIndex = addFunctionConst (
392
- false , Object . keys ( env ) , node . code
393
- ) ;
438
+ const match = node . expression . match | 0 ;
439
+ // Function only required if expression can match
440
+ const functionIndex = emitCall && match !== NEVER_MATCH
441
+ ? addFunctionConst ( false , Object . keys ( env ) , node . code )
442
+ : null ;
394
443
395
444
return emitCall
396
445
? buildSequence (
397
446
[ op . PUSH_CURR_POS ] ,
398
447
expressionCode ,
399
448
buildCondition (
449
+ match ,
400
450
[ op . IF_NOT_ERROR ] ,
401
451
buildSequence (
402
452
[ op . LOAD_SAVED_POS , 1 ] ,
@@ -412,8 +462,7 @@ function generateBytecode(ast) {
412
462
sequence ( node , context ) {
413
463
function buildElementsCode ( elements , context ) {
414
464
if ( elements . length > 0 ) {
415
- const processedCount
416
- = node . elements . length - elements . slice ( 1 ) . length ;
465
+ const processedCount = node . elements . length - elements . length + 1 ;
417
466
418
467
return buildSequence (
419
468
generate ( elements [ 0 ] , {
@@ -423,6 +472,7 @@ function generateBytecode(ast) {
423
472
action : null
424
473
} ) ,
425
474
buildCondition (
475
+ elements [ 0 ] . match | 0 ,
426
476
[ op . IF_NOT_ERROR ] ,
427
477
buildElementsCode ( elements . slice ( 1 ) , {
428
478
sp : context . sp + 1 ,
@@ -508,6 +558,7 @@ function generateBytecode(ast) {
508
558
action : null
509
559
} ) ,
510
560
buildCondition (
561
+ node . match | 0 ,
511
562
[ op . IF_NOT_ERROR ] ,
512
563
buildSequence ( [ op . POP ] , [ op . TEXT ] ) ,
513
564
[ op . NIP ]
@@ -531,6 +582,10 @@ function generateBytecode(ast) {
531
582
action : null
532
583
} ) ,
533
584
buildCondition (
585
+ // Check expression match, not the node match
586
+ // If expression always match, no need to replace FAILED to NULL,
587
+ // because FAILED will never appeared
588
+ - ( node . expression . match | 0 ) ,
534
589
[ op . IF_ERROR ] ,
535
590
buildSequence ( [ op . POP ] , [ op . PUSH_NULL ] ) ,
536
591
[ ]
@@ -564,6 +619,8 @@ function generateBytecode(ast) {
564
619
[ op . PUSH_EMPTY_ARRAY ] ,
565
620
expressionCode ,
566
621
buildCondition (
622
+ // Condition depends on the expression match, not the node match
623
+ node . expression . match | 0 ,
567
624
[ op . IF_NOT_ERROR ] ,
568
625
buildSequence ( buildAppendLoop ( expressionCode ) , [ op . POP ] ) ,
569
626
buildSequence ( [ op . POP ] , [ op . POP ] , [ op . PUSH_FAILED ] )
@@ -580,11 +637,11 @@ function generateBytecode(ast) {
580
637
} ,
581
638
582
639
semantic_and ( node , context ) {
583
- return buildSemanticPredicate ( node . code , false , context ) ;
640
+ return buildSemanticPredicate ( node , false , context ) ;
584
641
} ,
585
642
586
643
semantic_not ( node , context ) {
587
- return buildSemanticPredicate ( node . code , true , context ) ;
644
+ return buildSemanticPredicate ( node , true , context ) ;
588
645
} ,
589
646
590
647
rule_ref ( node ) {
@@ -593,19 +650,26 @@ function generateBytecode(ast) {
593
650
594
651
literal ( node ) {
595
652
if ( node . value . length > 0 ) {
596
- const stringIndex = addLiteralConst (
653
+ const match = node . match | 0 ;
654
+ // String only required if condition is generated or string is
655
+ // case-sensitive and node always match
656
+ const needConst = match === SOMETIMES_MATCH
657
+ || ( match === ALWAYS_MATCH && ! node . ignoreCase ) ;
658
+ const stringIndex = needConst ? addLiteralConst (
597
659
node . ignoreCase ? node . value . toLowerCase ( ) : node . value
598
- ) ;
599
- const expectedIndex = addExpectedConst ( {
660
+ ) : null ;
661
+ // Expectation not required if node always match
662
+ const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst ( {
600
663
type : "literal" ,
601
664
value : node . value ,
602
665
ignoreCase : node . ignoreCase
603
- } ) ;
666
+ } ) : null ;
604
667
605
668
// For case-sensitive strings the value must match the beginning of the
606
669
// remaining input exactly. As a result, we can use |ACCEPT_STRING| and
607
670
// save one |substr| call that would be needed if we used |ACCEPT_N|.
608
671
return buildCondition (
672
+ match ,
609
673
node . ignoreCase
610
674
? [ op . MATCH_STRING_IC , stringIndex ]
611
675
: [ op . MATCH_STRING , stringIndex ] ,
@@ -620,25 +684,34 @@ function generateBytecode(ast) {
620
684
} ,
621
685
622
686
class ( node ) {
623
- const classIndex = addClassConst ( node ) ;
624
- const expectedIndex = addExpectedConst ( {
687
+ const match = node . match | 0 ;
688
+ // Character class constant only required if condition is generated
689
+ const classIndex = match === SOMETIMES_MATCH ? addClassConst ( node ) : null ;
690
+ // Expectation not required if node always match
691
+ const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst ( {
625
692
type : "class" ,
626
693
value : node . parts ,
627
694
inverted : node . inverted ,
628
695
ignoreCase : node . ignoreCase
629
- } ) ;
696
+ } ) : null ;
630
697
631
698
return buildCondition (
699
+ match ,
632
700
[ op . MATCH_CHAR_CLASS , classIndex ] ,
633
701
[ op . ACCEPT_N , 1 ] ,
634
702
[ op . FAIL , expectedIndex ]
635
703
) ;
636
704
} ,
637
705
638
- any ( ) {
639
- const expectedIndex = addExpectedConst ( { type : "any" } ) ;
706
+ any ( node ) {
707
+ const match = node . match | 0 ;
708
+ // Expectation not required if node always match
709
+ const expectedIndex = match !== ALWAYS_MATCH ? addExpectedConst ( {
710
+ type : "any"
711
+ } ) : null ;
640
712
641
713
return buildCondition (
714
+ match ,
642
715
[ op . MATCH_ANY ] ,
643
716
[ op . ACCEPT_N , 1 ] ,
644
717
[ op . FAIL , expectedIndex ]
0 commit comments