To @flow: Token, Lexer, ParseError, and ParseNode. (#839)

* To @flow: Token, Lexer, ParseError, and ParseNode. * PR fixes 1.
2025-10-05 11:18:39 +00:00 · 2017-09-04 15:27:58 -04:00
parent 12399da73d
commit 13f3eac741
6 changed files with 111 additions and 70 deletions
--- a/.flowconfig
+++ b/.flowconfig
@@ -5,6 +5,7 @@
 [include]

 [libs]
+flow-typed

 [lints]

--- a/flow-typed/match-at.js
+++ b/flow-typed/match-at.js
@@ -0,0 +1,3 @@
+declare module 'match-at' {
+    declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
+}
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -1,3 +1,4 @@
+// @flow
 /**
 * The Lexer class handles tokenizing the input in various ways. Since our
 * parser expects us to be able to backtrack, the lexer allows lexing from any
@@ -13,48 +14,7 @@

 import matchAt from "match-at";
 import ParseError from "./ParseError";
-
-/**
- * The resulting token returned from `lex`.
- *
- * It consists of the token text plus some position information.
- * The position information is essentially a range in an input string,
- * but instead of referencing the bare input string, we refer to the lexer.
- * That way it is possible to attach extra metadata to the input string,
- * like for example a file name or similar.
- *
- * The position information (all three parameters) is optional,
- * so it is OK to construct synthetic tokens if appropriate.
- * Not providing available position information may lead to
- * degraded error reporting, though.
- *
- * @param {string}  text   the text of this token
- * @param {number=} start  the start offset, zero-based inclusive
- * @param {number=} end    the end offset, zero-based exclusive
- * @param {Lexer=}  lexer  the lexer which in turn holds the input string
- */
-class Token {
-    constructor(text, start, end, lexer) {
-        this.text = text;
-        this.start = start;
-        this.end = end;
-        this.lexer = lexer;
-    }
-
-    /**
-     * Given a pair of tokens (this and endToken), compute a “Token” encompassing
-     * the whole input range enclosed by these two.
-     *
-     * @param {Token}  endToken  last token of the range, inclusive
-     * @param {string} text      the text of the newly constructed token
-     */
-    range(endToken, text) {
-        if (endToken.lexer !== this.lexer) {
-            return new Token(text); // sorry, no position information available
-        }
-        return new Token(text, this.start, endToken.end, this.lexer);
-    }
-}
+import {LexerInterface, Token} from "./Token";

 /* The following tokenRegex
 * - matches typical whitespace (but not NBSP etc.) using its first group
@@ -79,11 +39,12 @@ const tokenRegex = new RegExp(
    ")"
 );

-/*
- * Main Lexer class
- */
-class Lexer {
-    constructor(input) {
+/** Main Lexer class */
+class Lexer implements LexerInterface {
+    input: string;
+    pos: number;
+
+    constructor(input: string) {
        this.input = input;
        this.pos = 0;
    }
@@ -91,7 +52,7 @@ class Lexer {
    /**
     * This function lexes a single token.
     */
-    lex() {
+    lex(): Token {
        const input = this.input;
        const pos = this.pos;
        if (pos === input.length) {
--- a/src/ParseError.js
+++ b/src/ParseError.js
@@ -1,3 +1,7 @@
+// @flow
+import ParseNode from "./ParseNode";
+import {Token} from "./Token";
+
 /**
 * This is the ParseError class, which is the main error thrown by KaTeX
 * functions when something has gone wrong. This is used to distinguish internal
@@ -5,17 +9,20 @@
 *
 * If possible, a caller should provide a Token or ParseNode with information
 * about where in the source string the problem occurred.
- *
- * @param {string} message  The error message
- * @param {(Token|ParseNode)=} token  An object providing position information
 */
 class ParseError {
-    constructor(message, token) {
+    position: number|void; // Error position based on passed-in Token or ParseNode.
+
+    constructor(
+        message: string,         // The error message
+        token?: Token|ParseNode, // An object providing position information
+    ) {
        let error = "KaTeX parse error: " + message;
        let start;
-        let end;

-        if (token && token.lexer && token.start <= token.end) {
+        if (token && token.lexer &&
+            token.start != null && token.end != null &&
+            token.start <= token.end) {
            // If we have the input and a position, make the error a bit fancier

            // Get the input
@@ -23,7 +30,7 @@ class ParseError {

            // Prepend some information
            start = token.start;
-            end = token.end;
+            const end = token.end;
            if (start === input.length) {
                error += " at end of input: ";
            } else {
@@ -47,20 +54,22 @@ class ParseError {
                right = input.slice(end);
            }
            error += left + underlined + right;
+
        }

        // Some hackery to make ParseError a prototype of Error
        // See http://stackoverflow.com/a/8460753
        const self = new Error(error);
        self.name = "ParseError";
+        // $FlowFixMe
        self.__proto__ = ParseError.prototype;
-
+        // $FlowFixMe
        self.position = start;
        return self;
    }
 }

-// More hackery
+// $FlowFixMe More hackery
 ParseError.prototype.__proto__ = Error.prototype;

 module.exports = ParseError;
--- a/src/ParseNode.js
+++ b/src/ParseNode.js
@@ -1,22 +1,34 @@
+// @flow
+import {LexerInterface, Token} from "./Token";
+
 /**
 * The resulting parse tree nodes of the parse tree.
 *
- * It is possible to provide position information, so that a ParseNode can
- * fulfil a role similar to a Token in error reporting.
- * For details on the corresponding properties see Token constructor.
+ * It is possible to provide position information, so that a `ParseNode` can
+ * fulfill a role similar to a `Token` in error reporting.
+ * For details on the corresponding properties see `Token` constructor.
 * Providing such information can lead to better error reporting.
- *
- * @param {string}  type       type of node, like e.g. "ordgroup"
- * @param {?object} value      type-specific representation of the node
- * @param {string}  mode       parse mode in action for this node,
- *                             "math" or "text"
- * @param {Token=} firstToken  first token of the input for this node,
- *                             will omit position information if unset
- * @param {Token=} lastToken   last token of the input for this node,
- *                             will default to firstToken if unset
 */
 export default class ParseNode {
-    constructor(type, value, mode, firstToken, lastToken) {
+    type: *;
+    value: *;
+    mode: *;
+    // TODO: We should combine these to ({lexer, start, end}|void) as they
+    // should all exist together or not exist at all. That way, only a single
+    // void check needs to be done to see if we have metadata.
+    lexer: LexerInterface|void;
+    start: number|void;
+    end: number|void;
+
+    constructor(
+        type: string,       // type of node, like e.g. "ordgroup"
+        value: mixed,       // type-specific representation of the node
+        mode: string,       // parse mode in action for this node, "math" or "text"
+        firstToken?: Token, // first token of the input for this node,
+                            // will omit position information if unset
+        lastToken?: Token,  // last token of the input for this node,
+                            // will default to firstToken if unset
+    ) {
        this.type = type;
        this.value = value;
        this.mode = mode;
--- a/src/Token.js
+++ b/src/Token.js
@@ -0,0 +1,55 @@
+// @flow
+
+/**
+ * Interface required to break circular dependency between Token, Lexer, and
+ * ParseError.
+ */
+export interface LexerInterface {input: string, pos: number}
+
+/**
+ * The resulting token returned from `lex`.
+ *
+ * It consists of the token text plus some position information.
+ * The position information is essentially a range in an input string,
+ * but instead of referencing the bare input string, we refer to the lexer.
+ * That way it is possible to attach extra metadata to the input string,
+ * like for example a file name or similar.
+ *
+ * The position information (all three parameters) is optional,
+ * so it is OK to construct synthetic tokens if appropriate.
+ * Not providing available position information may lead to
+ * degraded error reporting, though.
+ */
+export class Token {
+    text: *;
+    start: *;
+    end: *;
+    lexer: *;
+
+    constructor(
+        text: string,           // the text of this token
+        start?: number,         // the start offset, zero-based inclusive
+        end?: number,           // the end offset, zero-based exclusive
+        lexer?: LexerInterface, // the lexer holding the input string
+    ) {
+        this.text = text;
+        this.start = start;
+        this.end = end;
+        this.lexer = lexer;
+    }
+
+    /**
+     * Given a pair of tokens (this and endToken), compute a `Token` encompassing
+     * the whole input range enclosed by these two.
+     */
+    range(
+        endToken: Token,  // last token of the range, inclusive
+        text: string,     // the text of the newly constructed token
+    ) {
+        if (endToken.lexer !== this.lexer) {
+            return new Token(text); // sorry, no position information available
+        }
+        return new Token(text, this.start, endToken.end, this.lexer);
+    }
+}
+