Use our own lexer, not jison's

Summary: Build our own lexer and inject it into jison's parser, because jison's lexer notation is confusing and annoying, and it doesn't let us do some fun stuff. Test Plan: Run stuff, make sure it still works. Reviewers: spicyj Reviewed By: spicyj Differential Revision: http://phabricator.benalpert.com/D40
2025-10-06 11:48:41 +00:00 · 2013-07-07 21:13:43 -07:00
parent 33625f7b08
commit 8f99433c80
3 changed files with 103 additions and 24 deletions
--- a/MJLite.js
+++ b/MJLite.js
@@ -1,4 +1,5 @@
 var parser = require("./parser.jison");
 parser.lexer = require("./lexer");
 var buildExpression = function(expression) {
    return _.map(expression, function(ex, i) {
--- a/lexer.js
+++ b/lexer.js
@@ -0,0 +1,94 @@
 var DEFAULT_STATE = 0,
    FUNC_STATE = 1;
 function Lexer() {
 };
 var funcs = [
    'cdot', 'frac', 'lvert', 'rvert', 'pm', 'div'
 ];
 var normals = [
    [/^[/|a-zA-Z0-9.]/, 'ORD'],
    [/^[*+-]/, 'BIN'],
    [/^\^/, '^'],
    [/^_/, '_'],
    [/^{/, '{'],
    [/^}/, '}'],
    [/^[(\[]/, 'OPEN'],
    [/^[)\]]/, 'CLOSE']
 ];
 Lexer.prototype.doMatch = function(match) {
    this.yytext = match;
    this.yyleng = match.length;
    this.yylloc.first_column = this._pos;
    this.yylloc.last_column = this._pos + match.length;
    this._pos += match.length;
 };
 Lexer.prototype.lex = function() {
    // Get rid of whitespace
    var whitespace = this._input.substr(this._pos).match(/^\s*/)[0];
    this._pos += whitespace.length;
    if (this._pos >= this._input.length) {
        return 'EOF';
    }
    var toMatch = this._input.substr(this._pos);
    if (this.state === DEFAULT_STATE) {
        if (/^\\/.test(toMatch)) {
            this.state = FUNC_STATE;
            this.doMatch('\\');
            return '\\';
        } else {
            for (var i = 0; i < normals.length; i++) {
                var normal = normals[i];
                var match = toMatch.match(normal[0]);
                if (match) {
                    this.doMatch(match[0]);
                    return normal[1];
                }
            }
        }
    } else if (this.state === FUNC_STATE) {
        for (var i = 0; i < funcs.length; i++) {
            var func = funcs[i];
            var regex = new RegExp('^' + func + '(?!a-zA-Z)');
            var match = toMatch.match(regex);
            if (match) {
                this.doMatch(match[0]);
                this.state = DEFAULT_STATE;
                return func;
            }
        }
    }
    throw "Unexpected character: '" + toMatch[0] + "' at position " + this._pos;
 };
 Lexer.prototype.setInput = function(input) {
    this._input = input;
    this._pos = 0;
    this.yyleng = 0;
    this.yytext = "";
    this.yylineno = 0;
    this.yylloc = {
        first_line: 1,
        first_column: 0,
        last_line: 1,
        last_column: 0
    };
    this.state = DEFAULT_STATE;
 };
 module.exports = new Lexer();
--- a/parser.jison
+++ b/parser.jison
@@ -4,22 +4,6 @@
 %lex
 %%
 \s+                   /* skip whitespace */
 cdot                  return 'CDOT'
 frac                  return 'FRAC'
 lvert                 return 'LVERT'
 rvert                 return 'RVERT'
 pm                    return 'PM'
 div                   return 'DIV'
 [/|a-zA-Z0-9]         return 'ORD'
 [*+-]                 return 'BIN'
 \^                    return '^'
 [_]                   return '_'
 [{]                   return '{'
 [}]                   return '}'
 [(]                   return 'OPEN'
 [)]                   return 'CLOSE'
 [\\]                  return '\\'
 <<EOF>>               return 'EOF'
 /lex
@@ -37,7 +21,7 @@ div                   return 'DIV'
 %% /* language grammar */
 expression
-    : ex EOF
+    : ex 'EOF'
        {return $1;}
    ;
@@ -61,22 +45,22 @@ group
        {$$ = $1;}
    | '{' ex '}'
        {$$ = $2;}
-    | '\\' func
+    | '\' func
        {$$ = $2;}
    ;
 func
-    : 'CDOT'
+    : 'cdot'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'PM'
+    | 'pm'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'DIV'
+    | 'div'
        {$$ = [{type: 'bin', value: yytext}];}
-    | 'FRAC' group group
+    | 'frac' group group
        {$$ = [{type: 'frac', value: {numer: $2, denom: $3}}];}
-    | 'LVERT'
+    | 'lvert'
        {$$ = [{type: 'open', value: yytext}];}
-    | 'RVERT'
+    | 'rvert'
        {$$ = [{type: 'close', value: yytext}];}
    ;