From 13f3eac7418ee35f9779aa9c4ad90587c06addf3 Mon Sep 17 00:00:00 2001
From: Ashish Myles <marcianx@gmail.com>
Date: Mon, 4 Sep 2017 15:27:58 -0400
Subject: [PATCH] To @flow: Token, Lexer, ParseError, and ParseNode. (#839)

* To @flow: Token, Lexer, ParseError, and ParseNode.

* PR fixes 1.
---
 .flowconfig            |  1 +
 flow-typed/match-at.js |  3 +++
 src/Lexer.js           | 57 +++++++-----------------------------------
 src/ParseError.js      | 27 +++++++++++++-------
 src/ParseNode.js       | 38 ++++++++++++++++++----------
 src/Token.js           | 55 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 111 insertions(+), 70 deletions(-)
 create mode 100644 flow-typed/match-at.js
 create mode 100644 src/Token.js
diff --git a/.flowconfig b/.flowconfig
index 1bd2021b..9d4ebfce 100644
--- a/.flowconfig
+++ b/.flowconfig
@@ -5,6 +5,7 @@
 [include]
 
 [libs]
+flow-typed
 
 [lints]
 
diff --git a/flow-typed/match-at.js b/flow-typed/match-at.js
new file mode 100644
index 00000000..86444b04
--- /dev/null
+++ b/flow-typed/match-at.js
@@ -0,0 +1,3 @@
+declare module 'match-at' {
+    declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
+}
diff --git a/src/Lexer.js b/src/Lexer.js
index 88fc3fdc..341989f4 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -1,3 +1,4 @@
+// @flow
 /**
  * The Lexer class handles tokenizing the input in various ways. Since our
  * parser expects us to be able to backtrack, the lexer allows lexing from any
@@ -13,48 +14,7 @@
 
 import matchAt from "match-at";
 import ParseError from "./ParseError";
-
-/**
- * The resulting token returned from `lex`.
- *
- * It consists of the token text plus some position information.
- * The position information is essentially a range in an input string,
- * but instead of referencing the bare input string, we refer to the lexer.
- * That way it is possible to attach extra metadata to the input string,
- * like for example a file name or similar.
- *
- * The position information (all three parameters) is optional,
- * so it is OK to construct synthetic tokens if appropriate.
- * Not providing available position information may lead to
- * degraded error reporting, though.
- *
- * @param {string}  text   the text of this token
- * @param {number=} start  the start offset, zero-based inclusive
- * @param {number=} end    the end offset, zero-based exclusive
- * @param {Lexer=}  lexer  the lexer which in turn holds the input string
- */
-class Token {
-    constructor(text, start, end, lexer) {
-        this.text = text;
-        this.start = start;
-        this.end = end;
-        this.lexer = lexer;
-    }
-
-    /**
-     * Given a pair of tokens (this and endToken), compute a “Token” encompassing
-     * the whole input range enclosed by these two.
-     *
-     * @param {Token}  endToken  last token of the range, inclusive
-     * @param {string} text      the text of the newly constructed token
-     */
-    range(endToken, text) {
-        if (endToken.lexer !== this.lexer) {
-            return new Token(text); // sorry, no position information available
-        }
-        return new Token(text, this.start, endToken.end, this.lexer);
-    }
-}
+import {LexerInterface, Token} from "./Token";
 
 /* The following tokenRegex
  * - matches typical whitespace (but not NBSP etc.) using its first group
@@ -79,11 +39,12 @@ const tokenRegex = new RegExp(
     ")"
 );
 
-/*
- * Main Lexer class
- */
-class Lexer {
-    constructor(input) {
+/** Main Lexer class */
+class Lexer implements LexerInterface {
+    input: string;
+    pos: number;
+
+    constructor(input: string) {
         this.input = input;
         this.pos = 0;
     }
@@ -91,7 +52,7 @@ class Lexer {
     /**
      * This function lexes a single token.
      */
-    lex() {
+    lex(): Token {
         const input = this.input;
         const pos = this.pos;
         if (pos === input.length) {
diff --git a/src/ParseError.js b/src/ParseError.js
index 862ed8b8..39457471 100644
--- a/src/ParseError.js
+++ b/src/ParseError.js
@@ -1,3 +1,7 @@
+// @flow
+import ParseNode from "./ParseNode";
+import {Token} from "./Token";
+
 /**
  * This is the ParseError class, which is the main error thrown by KaTeX
  * functions when something has gone wrong. This is used to distinguish internal
@@ -5,17 +9,20 @@
  *
  * If possible, a caller should provide a Token or ParseNode with information
  * about where in the source string the problem occurred.
- *
- * @param {string} message  The error message
- * @param {(Token|ParseNode)=} token  An object providing position information
  */
 class ParseError {
-    constructor(message, token) {
+    position: number|void; // Error position based on passed-in Token or ParseNode.
+
+    constructor(
+        message: string,         // The error message
+        token?: Token|ParseNode, // An object providing position information
+    ) {
         let error = "KaTeX parse error: " + message;
         let start;
-        let end;
 
-        if (token && token.lexer && token.start <= token.end) {
+        if (token && token.lexer &&
+            token.start != null && token.end != null &&
+            token.start <= token.end) {
             // If we have the input and a position, make the error a bit fancier
 
             // Get the input
@@ -23,7 +30,7 @@ class ParseError {
 
             // Prepend some information
             start = token.start;
-            end = token.end;
+            const end = token.end;
             if (start === input.length) {
                 error += " at end of input: ";
             } else {
@@ -47,20 +54,22 @@ class ParseError {
                 right = input.slice(end);
             }
             error += left + underlined + right;
+
         }
 
         // Some hackery to make ParseError a prototype of Error
         // See http://stackoverflow.com/a/8460753
         const self = new Error(error);
         self.name = "ParseError";
+        // $FlowFixMe
         self.__proto__ = ParseError.prototype;
-
+        // $FlowFixMe
         self.position = start;
         return self;
     }
 }
 
-// More hackery
+// $FlowFixMe More hackery
 ParseError.prototype.__proto__ = Error.prototype;
 
 module.exports = ParseError;
diff --git a/src/ParseNode.js b/src/ParseNode.js
index bfbe4315..cb562126 100644
--- a/src/ParseNode.js
+++ b/src/ParseNode.js
@@ -1,22 +1,34 @@
+// @flow
+import {LexerInterface, Token} from "./Token";
+
 /**
  * The resulting parse tree nodes of the parse tree.
  *
- * It is possible to provide position information, so that a ParseNode can
- * fulfil a role similar to a Token in error reporting.
- * For details on the corresponding properties see Token constructor.
+ * It is possible to provide position information, so that a `ParseNode` can
+ * fulfill a role similar to a `Token` in error reporting.
+ * For details on the corresponding properties see `Token` constructor.
  * Providing such information can lead to better error reporting.
- *
- * @param {string}  type       type of node, like e.g. "ordgroup"
- * @param {?object} value      type-specific representation of the node
- * @param {string}  mode       parse mode in action for this node,
- *                             "math" or "text"
- * @param {Token=} firstToken  first token of the input for this node,
- *                             will omit position information if unset
- * @param {Token=} lastToken   last token of the input for this node,
- *                             will default to firstToken if unset
  */
 export default class ParseNode {
-    constructor(type, value, mode, firstToken, lastToken) {
+    type: *;
+    value: *;
+    mode: *;
+    // TODO: We should combine these to ({lexer, start, end}|void) as they
+    // should all exist together or not exist at all. That way, only a single
+    // void check needs to be done to see if we have metadata.
+    lexer: LexerInterface|void;
+    start: number|void;
+    end: number|void;
+
+    constructor(
+        type: string,       // type of node, like e.g. "ordgroup"
+        value: mixed,       // type-specific representation of the node
+        mode: string,       // parse mode in action for this node, "math" or "text"
+        firstToken?: Token, // first token of the input for this node,
+                            // will omit position information if unset
+        lastToken?: Token,  // last token of the input for this node,
+                            // will default to firstToken if unset
+    ) {
         this.type = type;
         this.value = value;
         this.mode = mode;
diff --git a/src/Token.js b/src/Token.js
new file mode 100644
index 00000000..84716972
--- /dev/null
+++ b/src/Token.js
@@ -0,0 +1,55 @@
+// @flow
+
+/**
+ * Interface required to break circular dependency between Token, Lexer, and
+ * ParseError.
+ */
+export interface LexerInterface {input: string, pos: number}
+
+/**
+ * The resulting token returned from `lex`.
+ *
+ * It consists of the token text plus some position information.
+ * The position information is essentially a range in an input string,
+ * but instead of referencing the bare input string, we refer to the lexer.
+ * That way it is possible to attach extra metadata to the input string,
+ * like for example a file name or similar.
+ *
+ * The position information (all three parameters) is optional,
+ * so it is OK to construct synthetic tokens if appropriate.
+ * Not providing available position information may lead to
+ * degraded error reporting, though.
+ */
+export class Token {
+    text: *;
+    start: *;
+    end: *;
+    lexer: *;
+
+    constructor(
+        text: string,           // the text of this token
+        start?: number,         // the start offset, zero-based inclusive
+        end?: number,           // the end offset, zero-based exclusive
+        lexer?: LexerInterface, // the lexer holding the input string
+    ) {
+        this.text = text;
+        this.start = start;
+        this.end = end;
+        this.lexer = lexer;
+    }
+
+    /**
+     * Given a pair of tokens (this and endToken), compute a `Token` encompassing
+     * the whole input range enclosed by these two.
+     */
+    range(
+        endToken: Token,  // last token of the range, inclusive
+        text: string,     // the text of the newly constructed token
+    ) {
+        if (endToken.lexer !== this.lexer) {
+            return new Token(text); // sorry, no position information available
+        }
+        return new Token(text, this.start, endToken.end, this.lexer);
+    }
+}
+