examples/math/index.html

<!doctype html>
<html>
  <head>
    <title>ohm/js math demo</title>
    <meta charset=utf-8>
    <link href="math.css" rel="stylesheet"></link>
    <script src="../lib.js"></script>
    <script src="../../dist/ohm.js"></script>
    <script type="text/ohm-js">

// An Ohm grammar for arithmetic expressions.

Arithmetic {
  Exp
    = AddExp

  AddExp
    = AddExp "+" MulExp  -- plus
    | AddExp "-" MulExp  -- minus
    | MulExp

  MulExp
    = MulExp "*" ExpExp  -- times
    | MulExp "/" ExpExp  -- divide
    | ExpExp

  ExpExp
    = PriExp "^" ExpExp  -- power
    | PriExp

  PriExp
    = "(" Exp ")"  -- paren
    | "+" PriExp   -- pos
    | "-" PriExp   -- neg
    | ident
    | number

  /*
    The following rules have *descriptions*, which are optional parenthesized "comments" following
    the name of a rule in its declaration. Rule descriptions are used to produce better error
    messages when the input is not recognized. E.g., if you try to match the input "123" with the
    `ident` rule below, Ohm will say that "an identifier" was expected. Without `ident`'s rule
    description, the error message would have said that "a letter" was expected -- which is true,
    but probably too low-level to be helpful. Note that `letter`, `alnum`, and `digit` are built-in
    rules with their own descriptions (you can see their declarations in src/built-in-rules.ohm).
  */
  ident  (an identifier)
    = letter alnum*

  number  (a number)
    = digit* "." digit+  -- fract
    | digit+             -- whole
}

    </script>
    <script>

/*
  Load the grammar from the <script> tag above. Note that the tag's `type` attribute is set to
  "text/ohm-js" to differentiate it from other (non-Ohm) script tags.
*/

var g = ohm.grammarFromScriptElement();

/*
  You can use a grammar's `match()` method to to recognize inputs. `match()` returns a
  `MatchResult`, which (among other things) can tell you if it succeeded or failed. E.g.,
*/

g.match('1+2*3').succeeded();                   // evaluates to `true`
g.match('1+2*3').failed();                      // evaluates to `false`

g.match('I CAN HAS CHEEZBURGER?').succeeded();  // evaluates to `false`
g.match('I CAN HAS CHEEZBURGER?').failed();     // evaluates to `true`

/*
  There are two kinds of rules in Ohm:

  * Lexical rules, whose names begin with a lower-case letter, and
  * Syntactic rules, whose names begin with an upper-case letter.

  The difference between lexical and syntactic rules is that syntactic rules implicitly skip
  whitespace characters. Here, a "whitespace character" is anything that matches the grammar's
  `space` rule. By default, you get a "vanilla" implementation of `space` that matches characters
  like ' ', '\t', '\n', '\r', etc., but you're free to override `space` to suit the language you're
  creating. (E.g., you may want it to consume comments, the syntax of which is up to you.)

  Here's the `PriExp` rule from our arithmetic grammar:

    PriExp
      = "(" Exp ")"
      | "+" PriExp
      | "-" PriExp
      | ident
      | number

  Because `PriExp` is a syntactic rule, it matches "spacey" inputs like the following example:
*/

g.match(' (  \t123   ) ', 'PriExp').succeeded();  // evaluates to `true`

/*
  Note that you can optionally specify a "start rule" of a match by passing its name as the 2nd
  argument to the `match` method, as shown above. When you don't specify a start rule, `match()`
  uses the grammar's *default start rule*, which is the first rule in the grammar's declaration.
  That's `Exp` for our arithmetic grammar.

  Anyway, without support for syntactic rules, rules like `PriExp` would have to skip whitespace
  characters explicitly, which can be tedious and error-prone, and often obscures the meaning of
  the rule. E.g., here is what the `PriExp` rule would look like if it weren't a syntactic rule:

    priExp
      = space* "(" exp space* ")"
      | space* "+" priExp
      | space* "-" priExp
      | space* ident
      | space* number

  Note that `exp` -- the lexical version of the `Exp` in our arithmetic grammar (not shown) -- would
  also have to skip whitespace characters explicitly.
*/

/*
  In Ohm, a grammar determines the set of all valid inputs / sentences in a language, but it doesn't
  specify what to do with valid inputs. To do something other than recognize valid inputs (e.g., to
  generate a parse tree or interpret a program) you first have to create a *semantics* for that
  grammar.

  A semantics is a family of *operations* and *attributes* for a given grammar. A grammar may have
  any number of semantics associated with it -- this means that the clients of a grammar (even in
  the same program) never have to worry about operation/attribute name clashes.

  Below, we create a new semantics `s` for our arithmetic grammar.
*/

var s = g.createSemantics();

/*
  But a semantics without any operations or attributes is not very interesting: it doesn't do
  anything! Let's add an operation to our semantics that can be used to evaluate the arithmetic
  expressions:
*/

var constants = {pi: Math.PI, e: Math.E};

s.addOperation(
    'interpret',
    /*
      When you create an operation, you have to specify what it does for each rule in the grammar.
      You do this by passing the `addOperation` method an "action dictionary": a plain old JS object
      that maps the names of the rules in the grammar to *semantic actions*, i.e., functions that
      specify what to do with that particular syntactic construct in the language. Here's the action
      dictionary of our new operation:
    */
    {
      /*
        The arguments of a semantic action are the *concrete syntax tree (CST) nodes* produced by
        the body of its corresponding rule. E.g., here's the `Exp` rule from our arithmetic grammar:

          Exp
            = AddExp

        The body of this rule consists of an application of the `AddExp` rule, which produces a
        single CST node. Our semantic action for `Exp` will take this CST node as its only argument.

        (When you create a new operation / attribute, Ohm checks the arities of all of its semantic
        actions against their corresponding grammar rules -- this makes programming operations /
        attributes much less error-prone. More on this later.)

        Since the result of interpreting an `Exp` should be the same as the result of interpreting
        its enclosed "add expression", we write:
      */
      Exp: function(e) {
        return e.interpret();  // Note that operations are accessed as methods on the CST nodes.
      },

      /*
        Next, we look at `AddExp`:

          AddExp
            = AddExp "+" MulExp
            | AddExp "-" MulExp
            | MulExp

        The body of this rule is a disjunction (an "OR") of three parsing expressions. The first of
        these expressions,

          AddExp "+" MulExp

        will produce 3 CST nodes if it successfully matches the input: one for the `AddExp`
        application, one for the terminal "+", and another for the `MulExp` application. Likewise,
        the second choice,

          AddExp "-" MulExp

        will also produce 3 CST nodes on a successful match. The third choice, however,

          MulExp

        produces only 1 CST node. This mismatch would be problematic for someone who is trying to
        write a semantic action for `AddExp`: How many arguments should that semantic action take?
        Maybe it should depend on which choice succeeded? No, it turns out this wouldn't be such a
        good idea. For one thing, it would make the programmer's life more difficult (e.g., she
        would have to branch on the value of `arguments.length`). Worse, it would make it impossible
        for Ohm to check the arities of semantic actions at operation / attribute creation time,
        which would in turn make programming with Ohm error-prone.

        To avoid these problems, Ohm requires all of the operands in a disjunction

          e_1 | e_2 | ... | e_n

        to have the same *arity*, i.e., to produce the same number of CST nodes. In fact, the
        declaration of `AddExp`, as shown above, would result in a compile-time error -- namely,
        the call to `ohm.grammarFromScriptElement()` would throw an exception.

        We can fix this by refactoring the first two choices of `AddExp` into their own rules:

          AddExp
            = AddExp_plus
            | AddExp_minus
            | MulExp

          AddExp_plus
            = AddExp "+" MulExp

          AddExp_minus
            = AddExp "-" MulExp

        Now `AddExp` has arity 1, and both `AddExp_plus` and `AddExp_minus` have arity 3, and
        everything is consistent.

        The downside of this refactoring is that it has made our grammar more verbose. Fortunately,
        Ohm provides a syntactic sugar for this common construction: it's called an *inline rule
        declaration*. When you write this (notice the *case labels* `plus` and `minus`, which look
        like comments in Haskell):

          AddExp
            = AddExp "+" MulExp  -- plus
            | AddExp "-" MulExp  -- minus
            | MulExp

        the expression to the left of each `--` becomes the body of a new rule whose name is the
        name of the original rule concatenated with an underscore and the case label:

          AddExp
            = AddExp_plus
            | AddExp_minus
            | MulExp

          AddExp_plus
            = AddExp "+" MulExp

          (Similarly for AddExp_minus)

        Now it's straightforward to write the semantic actions for `AddExp`, `AddExp_plus`, and
        `AddExp_minus`:
      */
      AddExp: function(e) {
        return e.interpret();
      },
      AddExp_plus: function(x, _, y) {
        return x.interpret() + y.interpret();
      },
      AddExp_minus: function(x, _, y) {
        return x.interpret() - y.interpret();
      },

      /*
        The following semantic actions are more of the same...
      */
      MulExp:        function(e)         { return e.interpret(); },
      MulExp_times:  function(x, _, y)   { return x.interpret() * y.interpret(); },
      MulExp_divide: function(x, _, y)   { return x.interpret() / y.interpret(); },
      ExpExp:        function(e)         { return e.interpret(); },
      ExpExp_power:  function(x, _, y)   { return Math.pow(x.interpret(), y.interpret()); },
      PriExp:        function(e)         { return e.interpret(); },
      PriExp_paren:  function(_l, e, _r) { return e.interpret(); },
      PriExp_pos:    function(_, e)      { return e.interpret(); },
      PriExp_neg:    function(_, e)      { return -e.interpret(); },

      /*
        CST nodes have a couple of useful properties which contain information about where that
        node "came from" in the input:

        * `aNode.sourceString` contains the substring of the input that was consumed by the
          node.

        * `aNode.source.startIdx` and `aNode.source.endIdx` give the position of the source
          string in the original input.

        We use `this.sourceString` in the two rules below to interpret identifiers and numbers.
        (In a semantic action for a rule R, `this` is bound to the CST node that was produced by R.
        In other words, `this` is the parent of each of the CST nodes that are passed as arguments
        to the semantic action.)
      */
      ident: function(_l, _ns) {
        // Look up the value of a named constant, e.g., 'pi'.
        return constants[this.sourceString] || 0;
      },
      number: function(_) {
        // Use `parseFloat` to convert (e.g.) the string '123' to the number 123.
        return parseFloat(this.sourceString);
      }
    }
);

/*
  So now that we've created a semantics for our arithmetic grammar and defined the `interpret`
  operation, how do we use them?
*/

var r = g.match('(2+4)*7'); // First, you need a successful `MatchResult`.
var n = s(r);               // Then, you apply the semantics to that match result to get a CST node,
console.log(n.interpret()); // ... on which you can access the functionality provided by the
                            // semantics. (This will print `42`.)

/*
  Now we'll add an `asLisp` attribute to our semantics that converts CST nodes to Lisp-like trees.
  Attributes are just like operations, but (i) they are accessed like properties -- not methods --
  on CST nodes, and (ii) their values are memoized.
*/

s.addAttribute('asLisp', {
  AddExp_plus:   function(x, _, y)   { return ['+', x.asLisp, y.asLisp]; },
  AddExp_minus:  function(x, _, y)   { return ['-', x.asLisp, y.asLisp]; },
  MulExp_times:  function(x, _, y)   { return ['*', x.asLisp, y.asLisp]; },
  MulExp_divide: function(x, _, y)   { return ['/', x.asLisp, y.asLisp]; },
  ExpExp_power:  function(x, _, y)   { return ['pow', x.asLisp, y.asLisp]; },
  PriExp_paren:  function(_l, e, _r) { return e.asLisp; },
  PriExp_pos:    function(_, e)      { return e.asLisp; },
  PriExp_neg:    function(_, e)      { return ['neg', e.asLisp]; },
  ident:         function(_l, _ns)   { return this.sourceString; },
  number:        function(_)         { return this.sourceString; },

  /*
    When you create an operation or an attribute, you can optionally provide a `_nonterminal`
    semantic action that will be invoked when your action dictionary does not have a method that
    corresponds to the rule that created a CST node. The receiver (`this`) of the _nonterminal
    method will be that CST node, and `_nonterminal`'s only argument will be an array that contains
    the children of that node.
  */
  _nonterminal:  function(children) {
    if (children.length === 1) {
      // If this node has only one child, just return the Lisp-like tree of its child. This lets us
      // avoid writing semantic actions for the `Exp`, `AddExp`, `MulExp`, `ExpExp`, and `PriExp`
      // rules.
      return children[0].asLisp;
    } else {
      // If this node doesn't have exactly one child, we probably should have handled it specially.
      // So we'll throw an exception to let us know that we're missing a semantic action for this
      // type of node.
      throw new Error("Uh-oh, missing semantic action for " + this.constructor);
    }
  }
});

/*
  Remember the CST node that we created from the expression '(2+4)*7'? Now it has an `asLisp`
  attribute:
*/

n.asLisp;               // evaluates to `["*", ["+", "2", "4"], "7"]`
n.asLisp === n.asLisp;  // evaluates to `true`

// -------------------------------------------------------------------------------------------------

var elt = makeElement;

s.addOperation('toTree', {
  Exp:           function(e)         { return elt('exp', e.toTree()); },
  AddExp:        function(e)         { return elt('addExp', e.toTree()); },
  AddExp_plus:   function(x, op, y)  { return elt('plus', x.toTree(), op.toTree(), y.toTree()); },
  AddExp_minus:  function(x, op, y)  { return elt('minus', x.toTree(), op.toTree(), y.toTree()); },
  MulExp:        function(e)         { return elt('mulExp', e.toTree()); },
  MulExp_times:  function(x, op, y)  { return elt('times', x.toTree(), op.toTree(), y.toTree()); },
  MulExp_divide: function(x, op, y)  { return elt('divide', x.toTree(), op.toTree(), y.toTree()); },
  ExpExp:        function(e)         { return elt('expExp', e.toTree()); },
  ExpExp_power:  function(x, op, y)  { return elt('power', x.toTree(), op.toTree(), y.toTree()); },
  PriExp:        function(e)         { return elt('priExp', e.toTree()); },
  PriExp_paren:  function(op, e, cp) { return elt('paren', op.toTree(), e.toTree(), cp.toTree()); },
  PriExp_pos:    function(sign, e)   { return elt('pos', sign.toTree(), e.toTree()); },
  PriExp_neg:    function(sign, e)   { return elt('neg', sign.toTree(), e.toTree()); },
  ident:         function(_l, _ns)   { return elt('ident', this.sourceString); },
  number:        function(_)         { return elt('number', this.sourceString); },

  // The _terminal semantic action specifies what your operation or attribute should do with
  // (you guessed it) terminals. Here, we return the terminal's `primitiveValue` property,
  // which for a string terminal is the string itself. For a number terminal `t`,
  // `t.primitiveValue` evaluates to the corresponding (JavaScript) number, and so on.
  _terminal:     function()          { return this.primitiveValue; }
});

// -------------------------------------------------------------------------------------------------

s.addOperation('toTwoD', {
  AddExp_plus:   function(x, op, y)   { var operator = elt('operator', op.toTwoD());
                                        return elt('plus', x.toTwoD(), operator, y.toTwoD()); },
  AddExp_minus:  function(x, op, y)   { var operator = elt('operator', '\u2212');
                                        return elt('minus', x.toTwoD(), operator, y.toTwoD()); },
  MulExp_times:  function(x, op, y)   { var operator = elt('operator', '\u00D7');
                                        return elt('times', x.toTwoD(), operator, y.toTwoD()); },
  MulExp_divide: function(x, op, y)   { var numerator = elt('numerator', x.toTwoD());
                                        var denominator = elt('denominator', y.toTwoD());
                                        return elt('fraction', numerator, denominator); },
  ExpExp_power:  function(x, op, y)   { var base = elt('theBase', x.toTwoD());
                                        var exponent = elt('exponent', y.toTwoD());
                                        return elt('power', base, exponent); },
  PriExp_paren:  function(_l, e, _r)  { return elt('paren', e.toTwoD()); },
  PriExp_pos:    function(sign, e)    { return elt('pos', sign.toTwoD(), e.toTwoD()); },
  PriExp_neg:    function(sign, e)    { return elt('neg', sign.toTwoD(), e.toTwoD()); },
  ident:         function(_l, _ns)    { var text = this.sourceString;
                                        return elt('ident', text === 'pi' ? '\u03C0' : text); },
  number:        function(_)          { return elt('number', this.sourceString); },
  _terminal:     function()           { return this.primitiveValue; }

  // Remember the `_nonterminal` semantic action that we wrote for the `interpret` operation above?
  // Turns out that's so convenient that the Ohm people decided to give it to you "for free", i.e.,
  // if you don't write a `_nonterminal` semantic action, you get that one automatically. Of course
  // you're free to override it if that's not what you want, but in this case, it's exactly what
  // we want.
});
    </script>
  </head>
  <body>
    <input type="text" id="input" placeholder="Enter an arithmetic expression..." size="80"></input>
    <div id="errorDiv">
      <div id="spaces"></div>
      <wrapperWrapper><wrapper>
        <div id="error"><label>Expected:</label><span id="errorDetails"></span></div>
      </wrapper></wrapperWrapper>
    </div>
    <div id="value"></div>
    <div id="lisp"></div>
    <div id="tree"></div>
    <div id="twoD"></div>
    <script>

/*
  This is the code that drives the UI in this demo.
*/

var input = document.getElementById('input');
var spaces = document.getElementById('spaces');
var error = document.getElementById('error');
var errorDiv = document.getElementById('errorDiv');
var errorDetails = document.getElementById('errorDetails');

input.value = '';
hideError();

function stringifyLisp(x) {
  return Array.isArray(x) ? '(' + x.map(stringifyLisp).join(' ') + ')'
                          : x.toString();
}

input.oninput = function() {
  hideError();
  this.className = undefined;
  var r = g.match(this.value);
  if (r.succeeded()) {
    // Notice that you can do many different things with the same match result, i.e., you only have
    // to process the input once.
    show('value', s(r).interpret());
    show('lisp', stringifyLisp(s(r).asLisp));
    show('tree', s(r).toTree());
    show('twoD', s(r).toTwoD());
  } else if (this.value.trim().length === 0) {
    // The match failed because there was no input, so don't complain. (That would be annoying.)
    show('value', '');
    show('lisp', '');
    show('tree', '');
    show('twoD', '');
  } else {
    showError(r);
  }
};

function hideError() {
  input.className = undefined;
  errorDiv.className = errorDiv.className = 'hidden';
}

function showError(r) {
  input.className = 'error';
  setTimeout(function() {
    // Position the error bubble to line up with the offending input.
    spaces.innerHTML = repeat(' ', r.getRightmostFailurePosition());

    // Set up the details, i.e., what input was expected at that position.
    removeChildren(errorDetails);

    // Add the non-fluffy failures first...
    var nonFluffyFailures =
        r.getRightmostFailures().filter(function(failure) { return !failure.isFluffy(); });
    nonFluffyFailures.forEach(addErrorDetails);

    // ... then the fluffy ones
    var fluffyFailures =
        r.getRightmostFailures().filter(function(failure) { return failure.isFluffy(); });
    fluffyFailures.forEach(addErrorDetails);

    // Show the error balloon.
    errorDiv.className = 'visible';
  }, 0);
}

function addErrorDetails(failure) {
  var element = createExpectedElement(failure);
  errorDetails.appendChild(element);
}

function createExpectedElement(failure) {
  if (failure.isStringTerminal()) {
    return elt('literal',
      elt('light', '"'),
      elt('code', failure.getText()),
      elt('light', '"'));
  } else if (failure.isCode()) {
    return elt('code', failure.getText());
  } else {
    return elt('description', failure.getText());
  }
}

function setInput(text) {
  input.value = text;
  input.oninput();
}

// Show something interesting when the page loads.
setInput('1 + 2 * 3 / 4^5');

// -------------------------------------------------------------------------------------------------

// This other stuff turns this example into a unit test.

function $(sel) { return document.querySelector(sel); }

window.test = function(t) {
  setInput('33 * 5 + 100');
  t.equal($('#value').textContent, '265', 'value is correct');
  t.equal($('#lisp').textContent, '(+ (* 33 5) 100)', 'LISP representation is correct');
  t.end();
};
    </script>
  </body>
</html>