Add SourceLocation to encapsulate Token/ParseNode debug information. (#904)

* Add SourceLocation to encapsulate Token/ParseNode debug information. * Specify concrete Token text type as it captures type mismatches. * Responded to comments.
2025-10-07 04:08:43 +00:00 · 2017-09-25 14:29:41 -04:00
parent f10ea4cbeb
commit 59bed2ad08
7 changed files with 68 additions and 44 deletions
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -14,6 +14,7 @@

 import matchAt from "match-at";
 import ParseError from "./ParseError";
+import SourceLocation from "./SourceLocation";
 import {LexerInterface, Token} from "./Token";

 /* The following tokenRegex
@@ -58,18 +59,18 @@ export default class Lexer implements LexerInterface {
        const input = this.input;
        const pos = this.pos;
        if (pos === input.length) {
-            return new Token("EOF", pos, pos, this);
+            return new Token("EOF", new SourceLocation(this, pos, pos));
        }
        const match = matchAt(tokenRegex, input, pos);
        if (match === null) {
            throw new ParseError(
                "Unexpected character: '" + input[pos] + "'",
-                new Token(input[pos], pos, pos + 1, this));
+                new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
        }
        const text = match[2] || " ";
        const start = this.pos;
        this.pos += match[0].length;
        const end = this.pos;
-        return new Token(text, start, end, this);
+        return new Token(text, new SourceLocation(this, start, end));
    }
 }
--- a/src/MacroExpander.js
+++ b/src/MacroExpander.js
@@ -140,7 +140,7 @@ export default class MacroExpander implements MacroContextInterface {
                        expansion.splice(i + 1, 1); // drop first #
                    } else if (/^[1-9]$/.test(tok.text)) {
                        // replace the placeholder with the indicated argument
-                        expansion.splice(i, 2, ...args[tok.text - 1]);
+                        expansion.splice(i, 2, ...args[+tok.text - 1]);
                    } else {
                        throw new ParseError(
                            "Not a valid argument number",
--- a/src/ParseError.js
+++ b/src/ParseError.js
@@ -21,17 +21,16 @@ class ParseError {
        let error = "KaTeX parse error: " + message;
        let start;

-        if (token && token.lexer &&
-            token.start != null && token.end != null &&
-            token.start <= token.end) {
+        const loc = token && token.loc;
+        if (loc && loc.start <= loc.end) {
            // If we have the input and a position, make the error a bit fancier

            // Get the input
-            const input = token.lexer.input;
+            const input = loc.lexer.input;

            // Prepend some information
-            start = token.start;
-            const end = token.end;
+            start = loc.start;
+            const end = loc.end;
            if (start === input.length) {
                error += " at end of input: ";
            } else {
--- a/src/ParseNode.js
+++ b/src/ParseNode.js
@@ -1,5 +1,6 @@
 // @flow
-import {LexerInterface, Token} from "./Token";
+import {Token} from "./Token";
+import SourceLocation from "./SourceLocation";
 import type {Mode} from "./types";

 /**
@@ -14,12 +15,7 @@ export default class ParseNode {
    type: *;
    value: *;
    mode: Mode;
-    // TODO: We should combine these to ({lexer, start, end}|void) as they
-    // should all exist together or not exist at all. That way, only a single
-    // void check needs to be done to see if we have metadata.
-    lexer: LexerInterface | void;
-    start: number | void;
-    end: number | void;
+    loc: ?SourceLocation;

    constructor(
        type: string,       // type of node, like e.g. "ordgroup"
@@ -33,10 +29,6 @@ export default class ParseNode {
        this.type = type;
        this.value = value;
        this.mode = mode;
-        if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) {
-            this.lexer = firstToken.lexer;
-            this.start = firstToken.start;
-            this.end = (lastToken || firstToken).end;
-        }
+        this.loc = SourceLocation.range(firstToken, lastToken);
    }
 }
--- a/src/SourceLocation.js
+++ b/src/SourceLocation.js
@@ -0,0 +1,43 @@
+// @flow
+import type {LexerInterface} from "./Token";
+
+/**
+ * Lexing or parsing positional information for error reporting.
+ * This object is immutable.
+ */
+export default class SourceLocation {
+    lexer: LexerInterface; // Lexer holding the input string.
+    start: number;         // Start offset, zero-based inclusive.
+    end: number;           // End offset, zero-based exclusive.
+
+    constructor(lexer: LexerInterface, start: number, end: number) {
+        this.lexer = lexer;
+        this.start = start;
+        this.end = end;
+        Object.freeze(this); // Immutable to allow sharing in range().
+    }
+
+    /**
+     * Merges two `SourceLocation`s from location providers, given they are
+     * provided in order of appearance.
+     * - Returns the first one's location if only the first is provided.
+     * - Returns a merged range of the first and the last if both are provided
+     *   and their lexers match.
+     * - Otherwise, returns null.
+     */
+    static range(
+        first?: {loc: ?SourceLocation},
+        second?: {loc: ?SourceLocation},
+    ): ?SourceLocation {
+        if (!second) {
+            return first && first.loc;
+        } else if (!first || !first.loc || !second.loc ||
+                   first.loc.lexer !== second.loc.lexer) {
+            return null;
+        } else {
+            return new SourceLocation(
+                    first.loc.lexer, first.loc.start, second.loc.end);
+        }
+    }
+}
+
--- a/src/Token.js
+++ b/src/Token.js
@@ -1,4 +1,5 @@
 // @flow
+import SourceLocation from "./SourceLocation";

 /**
 * Interface required to break circular dependency between Token, Lexer, and
@@ -15,27 +16,20 @@ export interface LexerInterface {input: string, pos: number}
 * That way it is possible to attach extra metadata to the input string,
 * like for example a file name or similar.
 *
- * The position information (all three parameters) is optional,
- * so it is OK to construct synthetic tokens if appropriate.
- * Not providing available position information may lead to
- * degraded error reporting, though.
+ * The position information is optional, so it is OK to construct synthetic
+ * tokens if appropriate. Not providing available position information may
+ * lead to degraded error reporting, though.
 */
 export class Token {
-    text: *;
-    start: *;
-    end: *;
-    lexer: *;
+    text: string;
+    loc: ?SourceLocation;

    constructor(
        text: string,           // the text of this token
-        start?: number,         // the start offset, zero-based inclusive
-        end?: number,           // the end offset, zero-based exclusive
-        lexer?: LexerInterface, // the lexer holding the input string
+        loc: ?SourceLocation,
    ) {
        this.text = text;
-        this.start = start;
-        this.end = end;
-        this.lexer = lexer;
+        this.loc = loc;
    }

    /**
@@ -46,10 +40,7 @@ export class Token {
        endToken: Token,  // last token of the range, inclusive
        text: string,     // the text of the newly constructed token
    ) {
-        if (endToken.lexer !== this.lexer) {
-            return new Token(text); // sorry, no position information available
-        }
-        return new Token(text, this.start, endToken.end, this.lexer);
+        return new Token(text, SourceLocation.range(this, endToken));
    }
 }

--- a/test/katex-spec.js
+++ b/test/katex-spec.js
@@ -61,10 +61,8 @@ const stripPositions = function(expr) {
    if (typeof expr !== "object" || expr === null) {
        return expr;
    }
-    if (expr.lexer && typeof expr.start === "number") {
-        delete expr.lexer;
-        delete expr.start;
-        delete expr.end;
+    if (expr.loc && expr.loc.lexer && typeof expr.loc.start === "number") {
+        delete expr.loc;
    }
    Object.keys(expr).forEach(function(key) {
        stripPositions(expr[key]);