diff --git a/src/Lexer.js b/src/Lexer.js index 3bb33870..da34d3a4 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -14,6 +14,7 @@ import matchAt from "match-at"; import ParseError from "./ParseError"; +import SourceLocation from "./SourceLocation"; import {LexerInterface, Token} from "./Token"; /* The following tokenRegex @@ -58,18 +59,18 @@ export default class Lexer implements LexerInterface { const input = this.input; const pos = this.pos; if (pos === input.length) { - return new Token("EOF", pos, pos, this); + return new Token("EOF", new SourceLocation(this, pos, pos)); } const match = matchAt(tokenRegex, input, pos); if (match === null) { throw new ParseError( "Unexpected character: '" + input[pos] + "'", - new Token(input[pos], pos, pos + 1, this)); + new Token(input[pos], new SourceLocation(this, pos, pos + 1))); } const text = match[2] || " "; const start = this.pos; this.pos += match[0].length; const end = this.pos; - return new Token(text, start, end, this); + return new Token(text, new SourceLocation(this, start, end)); } } diff --git a/src/MacroExpander.js b/src/MacroExpander.js index 608c4eff..69c9746c 100644 --- a/src/MacroExpander.js +++ b/src/MacroExpander.js @@ -140,7 +140,7 @@ export default class MacroExpander implements MacroContextInterface { expansion.splice(i + 1, 1); // drop first # } else if (/^[1-9]$/.test(tok.text)) { // replace the placeholder with the indicated argument - expansion.splice(i, 2, ...args[tok.text - 1]); + expansion.splice(i, 2, ...args[+tok.text - 1]); } else { throw new ParseError( "Not a valid argument number", diff --git a/src/ParseError.js b/src/ParseError.js index 48791946..56ed1d85 100644 --- a/src/ParseError.js +++ b/src/ParseError.js @@ -21,17 +21,16 @@ class ParseError { let error = "KaTeX parse error: " + message; let start; - if (token && token.lexer && - token.start != null && token.end != null && - token.start <= token.end) { + const loc = token && token.loc; + if (loc && loc.start <= loc.end) { // If we have the input and a position, make the error a bit fancier // Get the input - const input = token.lexer.input; + const input = loc.lexer.input; // Prepend some information - start = token.start; - const end = token.end; + start = loc.start; + const end = loc.end; if (start === input.length) { error += " at end of input: "; } else { diff --git a/src/ParseNode.js b/src/ParseNode.js index 7479f41a..a78c0d2c 100644 --- a/src/ParseNode.js +++ b/src/ParseNode.js @@ -1,5 +1,6 @@ // @flow -import {LexerInterface, Token} from "./Token"; +import {Token} from "./Token"; +import SourceLocation from "./SourceLocation"; import type {Mode} from "./types"; /** @@ -14,12 +15,7 @@ export default class ParseNode { type: *; value: *; mode: Mode; - // TODO: We should combine these to ({lexer, start, end}|void) as they - // should all exist together or not exist at all. That way, only a single - // void check needs to be done to see if we have metadata. - lexer: LexerInterface | void; - start: number | void; - end: number | void; + loc: ?SourceLocation; constructor( type: string, // type of node, like e.g. "ordgroup" @@ -33,10 +29,6 @@ export default class ParseNode { this.type = type; this.value = value; this.mode = mode; - if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) { - this.lexer = firstToken.lexer; - this.start = firstToken.start; - this.end = (lastToken || firstToken).end; - } + this.loc = SourceLocation.range(firstToken, lastToken); } } diff --git a/src/SourceLocation.js b/src/SourceLocation.js new file mode 100644 index 00000000..4cfe6578 --- /dev/null +++ b/src/SourceLocation.js @@ -0,0 +1,43 @@ +// @flow +import type {LexerInterface} from "./Token"; + +/** + * Lexing or parsing positional information for error reporting. + * This object is immutable. + */ +export default class SourceLocation { + lexer: LexerInterface; // Lexer holding the input string. + start: number; // Start offset, zero-based inclusive. + end: number; // End offset, zero-based exclusive. + + constructor(lexer: LexerInterface, start: number, end: number) { + this.lexer = lexer; + this.start = start; + this.end = end; + Object.freeze(this); // Immutable to allow sharing in range(). + } + + /** + * Merges two `SourceLocation`s from location providers, given they are + * provided in order of appearance. + * - Returns the first one's location if only the first is provided. + * - Returns a merged range of the first and the last if both are provided + * and their lexers match. + * - Otherwise, returns null. + */ + static range( + first?: {loc: ?SourceLocation}, + second?: {loc: ?SourceLocation}, + ): ?SourceLocation { + if (!second) { + return first && first.loc; + } else if (!first || !first.loc || !second.loc || + first.loc.lexer !== second.loc.lexer) { + return null; + } else { + return new SourceLocation( + first.loc.lexer, first.loc.start, second.loc.end); + } + } +} + diff --git a/src/Token.js b/src/Token.js index 84716972..30c90dd8 100644 --- a/src/Token.js +++ b/src/Token.js @@ -1,4 +1,5 @@ // @flow +import SourceLocation from "./SourceLocation"; /** * Interface required to break circular dependency between Token, Lexer, and @@ -15,27 +16,20 @@ export interface LexerInterface {input: string, pos: number} * That way it is possible to attach extra metadata to the input string, * like for example a file name or similar. * - * The position information (all three parameters) is optional, - * so it is OK to construct synthetic tokens if appropriate. - * Not providing available position information may lead to - * degraded error reporting, though. + * The position information is optional, so it is OK to construct synthetic + * tokens if appropriate. Not providing available position information may + * lead to degraded error reporting, though. */ export class Token { - text: *; - start: *; - end: *; - lexer: *; + text: string; + loc: ?SourceLocation; constructor( text: string, // the text of this token - start?: number, // the start offset, zero-based inclusive - end?: number, // the end offset, zero-based exclusive - lexer?: LexerInterface, // the lexer holding the input string + loc: ?SourceLocation, ) { this.text = text; - this.start = start; - this.end = end; - this.lexer = lexer; + this.loc = loc; } /** @@ -46,10 +40,7 @@ export class Token { endToken: Token, // last token of the range, inclusive text: string, // the text of the newly constructed token ) { - if (endToken.lexer !== this.lexer) { - return new Token(text); // sorry, no position information available - } - return new Token(text, this.start, endToken.end, this.lexer); + return new Token(text, SourceLocation.range(this, endToken)); } } diff --git a/test/katex-spec.js b/test/katex-spec.js index 882a452c..1ae324fd 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -61,10 +61,8 @@ const stripPositions = function(expr) { if (typeof expr !== "object" || expr === null) { return expr; } - if (expr.lexer && typeof expr.start === "number") { - delete expr.lexer; - delete expr.start; - delete expr.end; + if (expr.loc && expr.loc.lexer && typeof expr.loc.start === "number") { + delete expr.loc; } Object.keys(expr).forEach(function(key) { stripPositions(expr[key]);