Add SourceLocation to encapsulate Token/ParseNode debug information. (#904)

* Add SourceLocation to encapsulate Token/ParseNode debug information.

* Specify concrete Token text type as it captures type mismatches.

* Responded to comments.
This commit is contained in:
Ashish Myles
2017-09-25 14:29:41 -04:00
committed by Kevin Barabash
parent f10ea4cbeb
commit 59bed2ad08
7 changed files with 68 additions and 44 deletions

View File

@@ -14,6 +14,7 @@
import matchAt from "match-at"; import matchAt from "match-at";
import ParseError from "./ParseError"; import ParseError from "./ParseError";
import SourceLocation from "./SourceLocation";
import {LexerInterface, Token} from "./Token"; import {LexerInterface, Token} from "./Token";
/* The following tokenRegex /* The following tokenRegex
@@ -58,18 +59,18 @@ export default class Lexer implements LexerInterface {
const input = this.input; const input = this.input;
const pos = this.pos; const pos = this.pos;
if (pos === input.length) { if (pos === input.length) {
return new Token("EOF", pos, pos, this); return new Token("EOF", new SourceLocation(this, pos, pos));
} }
const match = matchAt(tokenRegex, input, pos); const match = matchAt(tokenRegex, input, pos);
if (match === null) { if (match === null) {
throw new ParseError( throw new ParseError(
"Unexpected character: '" + input[pos] + "'", "Unexpected character: '" + input[pos] + "'",
new Token(input[pos], pos, pos + 1, this)); new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
} }
const text = match[2] || " "; const text = match[2] || " ";
const start = this.pos; const start = this.pos;
this.pos += match[0].length; this.pos += match[0].length;
const end = this.pos; const end = this.pos;
return new Token(text, start, end, this); return new Token(text, new SourceLocation(this, start, end));
} }
} }

View File

@@ -140,7 +140,7 @@ export default class MacroExpander implements MacroContextInterface {
expansion.splice(i + 1, 1); // drop first # expansion.splice(i + 1, 1); // drop first #
} else if (/^[1-9]$/.test(tok.text)) { } else if (/^[1-9]$/.test(tok.text)) {
// replace the placeholder with the indicated argument // replace the placeholder with the indicated argument
expansion.splice(i, 2, ...args[tok.text - 1]); expansion.splice(i, 2, ...args[+tok.text - 1]);
} else { } else {
throw new ParseError( throw new ParseError(
"Not a valid argument number", "Not a valid argument number",

View File

@@ -21,17 +21,16 @@ class ParseError {
let error = "KaTeX parse error: " + message; let error = "KaTeX parse error: " + message;
let start; let start;
if (token && token.lexer && const loc = token && token.loc;
token.start != null && token.end != null && if (loc && loc.start <= loc.end) {
token.start <= token.end) {
// If we have the input and a position, make the error a bit fancier // If we have the input and a position, make the error a bit fancier
// Get the input // Get the input
const input = token.lexer.input; const input = loc.lexer.input;
// Prepend some information // Prepend some information
start = token.start; start = loc.start;
const end = token.end; const end = loc.end;
if (start === input.length) { if (start === input.length) {
error += " at end of input: "; error += " at end of input: ";
} else { } else {

View File

@@ -1,5 +1,6 @@
// @flow // @flow
import {LexerInterface, Token} from "./Token"; import {Token} from "./Token";
import SourceLocation from "./SourceLocation";
import type {Mode} from "./types"; import type {Mode} from "./types";
/** /**
@@ -14,12 +15,7 @@ export default class ParseNode {
type: *; type: *;
value: *; value: *;
mode: Mode; mode: Mode;
// TODO: We should combine these to ({lexer, start, end}|void) as they loc: ?SourceLocation;
// should all exist together or not exist at all. That way, only a single
// void check needs to be done to see if we have metadata.
lexer: LexerInterface | void;
start: number | void;
end: number | void;
constructor( constructor(
type: string, // type of node, like e.g. "ordgroup" type: string, // type of node, like e.g. "ordgroup"
@@ -33,10 +29,6 @@ export default class ParseNode {
this.type = type; this.type = type;
this.value = value; this.value = value;
this.mode = mode; this.mode = mode;
if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) { this.loc = SourceLocation.range(firstToken, lastToken);
this.lexer = firstToken.lexer;
this.start = firstToken.start;
this.end = (lastToken || firstToken).end;
}
} }
} }

43
src/SourceLocation.js Normal file
View File

@@ -0,0 +1,43 @@
// @flow
import type {LexerInterface} from "./Token";
/**
* Lexing or parsing positional information for error reporting.
* This object is immutable.
*/
export default class SourceLocation {
lexer: LexerInterface; // Lexer holding the input string.
start: number; // Start offset, zero-based inclusive.
end: number; // End offset, zero-based exclusive.
constructor(lexer: LexerInterface, start: number, end: number) {
this.lexer = lexer;
this.start = start;
this.end = end;
Object.freeze(this); // Immutable to allow sharing in range().
}
/**
* Merges two `SourceLocation`s from location providers, given they are
* provided in order of appearance.
* - Returns the first one's location if only the first is provided.
* - Returns a merged range of the first and the last if both are provided
* and their lexers match.
* - Otherwise, returns null.
*/
static range(
first?: {loc: ?SourceLocation},
second?: {loc: ?SourceLocation},
): ?SourceLocation {
if (!second) {
return first && first.loc;
} else if (!first || !first.loc || !second.loc ||
first.loc.lexer !== second.loc.lexer) {
return null;
} else {
return new SourceLocation(
first.loc.lexer, first.loc.start, second.loc.end);
}
}
}

View File

@@ -1,4 +1,5 @@
// @flow // @flow
import SourceLocation from "./SourceLocation";
/** /**
* Interface required to break circular dependency between Token, Lexer, and * Interface required to break circular dependency between Token, Lexer, and
@@ -15,27 +16,20 @@ export interface LexerInterface {input: string, pos: number}
* That way it is possible to attach extra metadata to the input string, * That way it is possible to attach extra metadata to the input string,
* like for example a file name or similar. * like for example a file name or similar.
* *
* The position information (all three parameters) is optional, * The position information is optional, so it is OK to construct synthetic
* so it is OK to construct synthetic tokens if appropriate. * tokens if appropriate. Not providing available position information may
* Not providing available position information may lead to * lead to degraded error reporting, though.
* degraded error reporting, though.
*/ */
export class Token { export class Token {
text: *; text: string;
start: *; loc: ?SourceLocation;
end: *;
lexer: *;
constructor( constructor(
text: string, // the text of this token text: string, // the text of this token
start?: number, // the start offset, zero-based inclusive loc: ?SourceLocation,
end?: number, // the end offset, zero-based exclusive
lexer?: LexerInterface, // the lexer holding the input string
) { ) {
this.text = text; this.text = text;
this.start = start; this.loc = loc;
this.end = end;
this.lexer = lexer;
} }
/** /**
@@ -46,10 +40,7 @@ export class Token {
endToken: Token, // last token of the range, inclusive endToken: Token, // last token of the range, inclusive
text: string, // the text of the newly constructed token text: string, // the text of the newly constructed token
) { ) {
if (endToken.lexer !== this.lexer) { return new Token(text, SourceLocation.range(this, endToken));
return new Token(text); // sorry, no position information available
}
return new Token(text, this.start, endToken.end, this.lexer);
} }
} }

View File

@@ -61,10 +61,8 @@ const stripPositions = function(expr) {
if (typeof expr !== "object" || expr === null) { if (typeof expr !== "object" || expr === null) {
return expr; return expr;
} }
if (expr.lexer && typeof expr.start === "number") { if (expr.loc && expr.loc.lexer && typeof expr.loc.start === "number") {
delete expr.lexer; delete expr.loc;
delete expr.start;
delete expr.end;
} }
Object.keys(expr).forEach(function(key) { Object.keys(expr).forEach(function(key) {
stripPositions(expr[key]); stripPositions(expr[key]);