forked from ohmjs/ohm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
547 lines (449 loc) · 21.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
<!doctype html>
<html>
<head>
<title>ohm/js math demo</title>
<meta charset=utf-8>
<link href="math.css" rel="stylesheet"></link>
<script src="../lib.js"></script>
<script src="../../dist/ohm.js"></script>
<script type="text/ohm-js">
// An Ohm grammar for arithmetic expressions.
Arithmetic {
Exp
= AddExp
AddExp
= AddExp "+" MulExp -- plus
| AddExp "-" MulExp -- minus
| MulExp
MulExp
= MulExp "*" ExpExp -- times
| MulExp "/" ExpExp -- divide
| ExpExp
ExpExp
= PriExp "^" ExpExp -- power
| PriExp
PriExp
= "(" Exp ")" -- paren
| "+" PriExp -- pos
| "-" PriExp -- neg
| ident
| number
/*
The following rules have *descriptions*, which are optional parenthesized "comments" following
the name of a rule in its declaration. Rule descriptions are used to produce better error
messages when the input is not recognized. E.g., if you try to match the input "123" with the
`ident` rule below, Ohm will say that "an identifier" was expected. Without `ident`'s rule
description, the error message would have said that "a letter" was expected -- which is true,
but probably too low-level to be helpful. Note that `letter`, `alnum`, and `digit` are built-in
rules with their own descriptions (you can see their declarations in src/built-in-rules.ohm).
*/
ident (an identifier)
= letter alnum*
number (a number)
= digit* "." digit+ -- fract
| digit+ -- whole
}
</script>
<script>
/*
Load the grammar from the <script> tag above. Note that the tag's `type` attribute is set to
"text/ohm-js" to differentiate it from other (non-Ohm) script tags.
*/
var g = ohm.grammarFromScriptElement();
/*
You can use a grammar's `match()` method to to recognize inputs. `match()` returns a
`MatchResult`, which (among other things) can tell you if it succeeded or failed. E.g.,
*/
g.match('1+2*3').succeeded(); // evaluates to `true`
g.match('1+2*3').failed(); // evaluates to `false`
g.match('I CAN HAS CHEEZBURGER?').succeeded(); // evaluates to `false`
g.match('I CAN HAS CHEEZBURGER?').failed(); // evaluates to `true`
/*
There are two kinds of rules in Ohm:
* Lexical rules, whose names begin with a lower-case letter, and
* Syntactic rules, whose names begin with an upper-case letter.
The difference between lexical and syntactic rules is that syntactic rules implicitly skip
whitespace characters. Here, a "whitespace character" is anything that matches the grammar's
`space` rule. By default, you get a "vanilla" implementation of `space` that matches characters
like ' ', '\t', '\n', '\r', etc., but you're free to override `space` to suit the language you're
creating. (E.g., you may want it to consume comments, the syntax of which is up to you.)
Here's the `PriExp` rule from our arithmetic grammar:
PriExp
= "(" Exp ")"
| "+" PriExp
| "-" PriExp
| ident
| number
Because `PriExp` is a syntactic rule, it matches "spacey" inputs like the following example:
*/
g.match(' ( \t123 ) ', 'PriExp').succeeded(); // evaluates to `true`
/*
Note that you can optionally specify a "start rule" of a match by passing its name as the 2nd
argument to the `match` method, as shown above. When you don't specify a start rule, `match()`
uses the grammar's *default start rule*, which is the first rule in the grammar's declaration.
That's `Exp` for our arithmetic grammar.
Anyway, without support for syntactic rules, rules like `PriExp` would have to skip whitespace
characters explicitly, which can be tedious and error-prone, and often obscures the meaning of
the rule. E.g., here is what the `PriExp` rule would look like if it weren't a syntactic rule:
priExp
= space* "(" exp space* ")"
| space* "+" priExp
| space* "-" priExp
| space* ident
| space* number
Note that `exp` -- the lexical version of the `Exp` in our arithmetic grammar (not shown) -- would
also have to skip whitespace characters explicitly.
*/
/*
In Ohm, a grammar determines the set of all valid inputs / sentences in a language, but it doesn't
specify what to do with valid inputs. To do something other than recognize valid inputs (e.g., to
generate a parse tree or interpret a program) you first have to create a *semantics* for that
grammar.
A semantics is a family of *operations* and *attributes* for a given grammar. A grammar may have
any number of semantics associated with it -- this means that the clients of a grammar (even in
the same program) never have to worry about operation/attribute name clashes.
Below, we create a new semantics `s` for our arithmetic grammar.
*/
var s = g.createSemantics();
/*
But a semantics without any operations or attributes is not very interesting: it doesn't do
anything! Let's add an operation to our semantics that can be used to evaluate the arithmetic
expressions:
*/
var constants = {pi: Math.PI, e: Math.E};
s.addOperation(
'interpret',
/*
When you create an operation, you have to specify what it does for each rule in the grammar.
You do this by passing the `addOperation` method an "action dictionary": a plain old JS object
that maps the names of the rules in the grammar to *semantic actions*, i.e., functions that
specify what to do with that particular syntactic construct in the language. Here's the action
dictionary of our new operation:
*/
{
/*
The arguments of a semantic action are the *concrete syntax tree (CST) nodes* produced by
the body of its corresponding rule. E.g., here's the `Exp` rule from our arithmetic grammar:
Exp
= AddExp
The body of this rule consists of an application of the `AddExp` rule, which produces a
single CST node. Our semantic action for `Exp` will take this CST node as its only argument.
(When you create a new operation / attribute, Ohm checks the arities of all of its semantic
actions against their corresponding grammar rules -- this makes programming operations /
attributes much less error-prone. More on this later.)
Since the result of interpreting an `Exp` should be the same as the result of interpreting
its enclosed "add expression", we write:
*/
Exp: function(e) {
return e.interpret(); // Note that operations are accessed as methods on the CST nodes.
},
/*
Next, we look at `AddExp`:
AddExp
= AddExp "+" MulExp
| AddExp "-" MulExp
| MulExp
The body of this rule is a disjunction (an "OR") of three parsing expressions. The first of
these expressions,
AddExp "+" MulExp
will produce 3 CST nodes if it successfully matches the input: one for the `AddExp`
application, one for the terminal "+", and another for the `MulExp` application. Likewise,
the second choice,
AddExp "-" MulExp
will also produce 3 CST nodes on a successful match. The third choice, however,
MulExp
produces only 1 CST node. This mismatch would be problematic for someone who is trying to
write a semantic action for `AddExp`: How many arguments should that semantic action take?
Maybe it should depend on which choice succeeded? No, it turns out this wouldn't be such a
good idea. For one thing, it would make the programmer's life more difficult (e.g., she
would have to branch on the value of `arguments.length`). Worse, it would make it impossible
for Ohm to check the arities of semantic actions at operation / attribute creation time,
which would in turn make programming with Ohm error-prone.
To avoid these problems, Ohm requires all of the operands in a disjunction
e_1 | e_2 | ... | e_n
to have the same *arity*, i.e., to produce the same number of CST nodes. In fact, the
declaration of `AddExp`, as shown above, would result in a compile-time error -- namely,
the call to `ohm.grammarFromScriptElement()` would throw an exception.
We can fix this by refactoring the first two choices of `AddExp` into their own rules:
AddExp
= AddExp_plus
| AddExp_minus
| MulExp
AddExp_plus
= AddExp "+" MulExp
AddExp_minus
= AddExp "-" MulExp
Now `AddExp` has arity 1, and both `AddExp_plus` and `AddExp_minus` have arity 3, and
everything is consistent.
The downside of this refactoring is that it has made our grammar more verbose. Fortunately,
Ohm provides a syntactic sugar for this common construction: it's called an *inline rule
declaration*. When you write this (notice the *case labels* `plus` and `minus`, which look
like comments in Haskell):
AddExp
= AddExp "+" MulExp -- plus
| AddExp "-" MulExp -- minus
| MulExp
the expression to the left of each `--` becomes the body of a new rule whose name is the
name of the original rule concatenated with an underscore and the case label:
AddExp
= AddExp_plus
| AddExp_minus
| MulExp
AddExp_plus
= AddExp "+" MulExp
(Similarly for AddExp_minus)
Now it's straightforward to write the semantic actions for `AddExp`, `AddExp_plus`, and
`AddExp_minus`:
*/
AddExp: function(e) {
return e.interpret();
},
AddExp_plus: function(x, _, y) {
return x.interpret() + y.interpret();
},
AddExp_minus: function(x, _, y) {
return x.interpret() - y.interpret();
},
/*
The following semantic actions are more of the same...
*/
MulExp: function(e) { return e.interpret(); },
MulExp_times: function(x, _, y) { return x.interpret() * y.interpret(); },
MulExp_divide: function(x, _, y) { return x.interpret() / y.interpret(); },
ExpExp: function(e) { return e.interpret(); },
ExpExp_power: function(x, _, y) { return Math.pow(x.interpret(), y.interpret()); },
PriExp: function(e) { return e.interpret(); },
PriExp_paren: function(_l, e, _r) { return e.interpret(); },
PriExp_pos: function(_, e) { return e.interpret(); },
PriExp_neg: function(_, e) { return -e.interpret(); },
/*
CST nodes have a couple of useful properties which contain information about where that
node "came from" in the input:
* `aNode.sourceString` contains the substring of the input that was consumed by the
node.
* `aNode.source.startIdx` and `aNode.source.endIdx` give the position of the source
string in the original input.
We use `this.sourceString` in the two rules below to interpret identifiers and numbers.
(In a semantic action for a rule R, `this` is bound to the CST node that was produced by R.
In other words, `this` is the parent of each of the CST nodes that are passed as arguments
to the semantic action.)
*/
ident: function(_l, _ns) {
// Look up the value of a named constant, e.g., 'pi'.
return constants[this.sourceString] || 0;
},
number: function(_) {
// Use `parseFloat` to convert (e.g.) the string '123' to the number 123.
return parseFloat(this.sourceString);
}
}
);
/*
So now that we've created a semantics for our arithmetic grammar and defined the `interpret`
operation, how do we use them?
*/
var r = g.match('(2+4)*7'); // First, you need a successful `MatchResult`.
var n = s(r); // Then, you apply the semantics to that match result to get a CST node,
console.log(n.interpret()); // ... on which you can access the functionality provided by the
// semantics. (This will print `42`.)
/*
Now we'll add an `asLisp` attribute to our semantics that converts CST nodes to Lisp-like trees.
Attributes are just like operations, but (i) they are accessed like properties -- not methods --
on CST nodes, and (ii) their values are memoized.
*/
s.addAttribute('asLisp', {
AddExp_plus: function(x, _, y) { return ['+', x.asLisp, y.asLisp]; },
AddExp_minus: function(x, _, y) { return ['-', x.asLisp, y.asLisp]; },
MulExp_times: function(x, _, y) { return ['*', x.asLisp, y.asLisp]; },
MulExp_divide: function(x, _, y) { return ['/', x.asLisp, y.asLisp]; },
ExpExp_power: function(x, _, y) { return ['pow', x.asLisp, y.asLisp]; },
PriExp_paren: function(_l, e, _r) { return e.asLisp; },
PriExp_pos: function(_, e) { return e.asLisp; },
PriExp_neg: function(_, e) { return ['neg', e.asLisp]; },
ident: function(_l, _ns) { return this.sourceString; },
number: function(_) { return this.sourceString; },
/*
When you create an operation or an attribute, you can optionally provide a `_nonterminal`
semantic action that will be invoked when your action dictionary does not have a method that
corresponds to the rule that created a CST node. The receiver (`this`) of the _nonterminal
method will be that CST node, and `_nonterminal`'s only argument will be an array that contains
the children of that node.
*/
_nonterminal: function(children) {
if (children.length === 1) {
// If this node has only one child, just return the Lisp-like tree of its child. This lets us
// avoid writing semantic actions for the `Exp`, `AddExp`, `MulExp`, `ExpExp`, and `PriExp`
// rules.
return children[0].asLisp;
} else {
// If this node doesn't have exactly one child, we probably should have handled it specially.
// So we'll throw an exception to let us know that we're missing a semantic action for this
// type of node.
throw new Error("Uh-oh, missing semantic action for " + this.constructor);
}
}
});
/*
Remember the CST node that we created from the expression '(2+4)*7'? Now it has an `asLisp`
attribute:
*/
n.asLisp; // evaluates to `["*", ["+", "2", "4"], "7"]`
n.asLisp === n.asLisp; // evaluates to `true`
// -------------------------------------------------------------------------------------------------
var elt = makeElement;
s.addOperation('toTree', {
Exp: function(e) { return elt('exp', e.toTree()); },
AddExp: function(e) { return elt('addExp', e.toTree()); },
AddExp_plus: function(x, op, y) { return elt('plus', x.toTree(), op.toTree(), y.toTree()); },
AddExp_minus: function(x, op, y) { return elt('minus', x.toTree(), op.toTree(), y.toTree()); },
MulExp: function(e) { return elt('mulExp', e.toTree()); },
MulExp_times: function(x, op, y) { return elt('times', x.toTree(), op.toTree(), y.toTree()); },
MulExp_divide: function(x, op, y) { return elt('divide', x.toTree(), op.toTree(), y.toTree()); },
ExpExp: function(e) { return elt('expExp', e.toTree()); },
ExpExp_power: function(x, op, y) { return elt('power', x.toTree(), op.toTree(), y.toTree()); },
PriExp: function(e) { return elt('priExp', e.toTree()); },
PriExp_paren: function(op, e, cp) { return elt('paren', op.toTree(), e.toTree(), cp.toTree()); },
PriExp_pos: function(sign, e) { return elt('pos', sign.toTree(), e.toTree()); },
PriExp_neg: function(sign, e) { return elt('neg', sign.toTree(), e.toTree()); },
ident: function(_l, _ns) { return elt('ident', this.sourceString); },
number: function(_) { return elt('number', this.sourceString); },
// The _terminal semantic action specifies what your operation or attribute should do with
// (you guessed it) terminals. Here, we return the terminal's `primitiveValue` property,
// which for a string terminal is the string itself. For a number terminal `t`,
// `t.primitiveValue` evaluates to the corresponding (JavaScript) number, and so on.
_terminal: function() { return this.primitiveValue; }
});
// -------------------------------------------------------------------------------------------------
s.addOperation('toTwoD', {
AddExp_plus: function(x, op, y) { var operator = elt('operator', op.toTwoD());
return elt('plus', x.toTwoD(), operator, y.toTwoD()); },
AddExp_minus: function(x, op, y) { var operator = elt('operator', '\u2212');
return elt('minus', x.toTwoD(), operator, y.toTwoD()); },
MulExp_times: function(x, op, y) { var operator = elt('operator', '\u00D7');
return elt('times', x.toTwoD(), operator, y.toTwoD()); },
MulExp_divide: function(x, op, y) { var numerator = elt('numerator', x.toTwoD());
var denominator = elt('denominator', y.toTwoD());
return elt('fraction', numerator, denominator); },
ExpExp_power: function(x, op, y) { var base = elt('theBase', x.toTwoD());
var exponent = elt('exponent', y.toTwoD());
return elt('power', base, exponent); },
PriExp_paren: function(_l, e, _r) { return elt('paren', e.toTwoD()); },
PriExp_pos: function(sign, e) { return elt('pos', sign.toTwoD(), e.toTwoD()); },
PriExp_neg: function(sign, e) { return elt('neg', sign.toTwoD(), e.toTwoD()); },
ident: function(_l, _ns) { var text = this.sourceString;
return elt('ident', text === 'pi' ? '\u03C0' : text); },
number: function(_) { return elt('number', this.sourceString); },
_terminal: function() { return this.primitiveValue; }
// Remember the `_nonterminal` semantic action that we wrote for the `interpret` operation above?
// Turns out that's so convenient that the Ohm people decided to give it to you "for free", i.e.,
// if you don't write a `_nonterminal` semantic action, you get that one automatically. Of course
// you're free to override it if that's not what you want, but in this case, it's exactly what
// we want.
});
</script>
</head>
<body>
<input type="text" id="input" placeholder="Enter an arithmetic expression..." size="80"></input>
<div id="errorDiv">
<div id="spaces"></div>
<wrapperWrapper><wrapper>
<div id="error"><label>Expected:</label><span id="errorDetails"></span></div>
</wrapper></wrapperWrapper>
</div>
<div id="value"></div>
<div id="lisp"></div>
<div id="tree"></div>
<div id="twoD"></div>
<script>
/*
This is the code that drives the UI in this demo.
*/
var input = document.getElementById('input');
var spaces = document.getElementById('spaces');
var error = document.getElementById('error');
var errorDiv = document.getElementById('errorDiv');
var errorDetails = document.getElementById('errorDetails');
input.value = '';
hideError();
function stringifyLisp(x) {
return Array.isArray(x) ? '(' + x.map(stringifyLisp).join(' ') + ')'
: x.toString();
}
input.oninput = function() {
hideError();
this.className = undefined;
var r = g.match(this.value);
if (r.succeeded()) {
// Notice that you can do many different things with the same match result, i.e., you only have
// to process the input once.
show('value', s(r).interpret());
show('lisp', stringifyLisp(s(r).asLisp));
show('tree', s(r).toTree());
show('twoD', s(r).toTwoD());
} else if (this.value.trim().length === 0) {
// The match failed because there was no input, so don't complain. (That would be annoying.)
show('value', '');
show('lisp', '');
show('tree', '');
show('twoD', '');
} else {
showError(r);
}
};
function hideError() {
input.className = undefined;
errorDiv.className = errorDiv.className = 'hidden';
}
function showError(r) {
input.className = 'error';
setTimeout(function() {
// Position the error bubble to line up with the offending input.
spaces.innerHTML = repeat(' ', r.getRightmostFailurePosition());
// Set up the details, i.e., what input was expected at that position.
removeChildren(errorDetails);
// Add the non-fluffy failures first...
var nonFluffyFailures =
r.getRightmostFailures().filter(function(failure) { return !failure.isFluffy(); });
nonFluffyFailures.forEach(addErrorDetails);
// ... then the fluffy ones
var fluffyFailures =
r.getRightmostFailures().filter(function(failure) { return failure.isFluffy(); });
fluffyFailures.forEach(addErrorDetails);
// Show the error balloon.
errorDiv.className = 'visible';
}, 0);
}
function addErrorDetails(failure) {
var element = createExpectedElement(failure);
errorDetails.appendChild(element);
}
function createExpectedElement(failure) {
if (failure.isStringTerminal()) {
return elt('literal',
elt('light', '"'),
elt('code', failure.getText()),
elt('light', '"'));
} else if (failure.isCode()) {
return elt('code', failure.getText());
} else {
return elt('description', failure.getText());
}
}
function setInput(text) {
input.value = text;
input.oninput();
}
// Show something interesting when the page loads.
setInput('1 + 2 * 3 / 4^5');
// -------------------------------------------------------------------------------------------------
// This other stuff turns this example into a unit test.
function $(sel) { return document.querySelector(sel); }
window.test = function(t) {
setInput('33 * 5 + 100');
t.equal($('#value').textContent, '265', 'value is correct');
t.equal($('#lisp').textContent, '(+ (* 33 5) 100)', 'LISP representation is correct');
t.end();
};
</script>
</body>
</html>