mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-07 04:08:43 +00:00
Add SourceLocation to encapsulate Token/ParseNode debug information. (#904)
* Add SourceLocation to encapsulate Token/ParseNode debug information. * Specify concrete Token text type as it captures type mismatches. * Responded to comments.
This commit is contained in:
committed by
Kevin Barabash
parent
f10ea4cbeb
commit
59bed2ad08
@@ -14,6 +14,7 @@
|
||||
|
||||
import matchAt from "match-at";
|
||||
import ParseError from "./ParseError";
|
||||
import SourceLocation from "./SourceLocation";
|
||||
import {LexerInterface, Token} from "./Token";
|
||||
|
||||
/* The following tokenRegex
|
||||
@@ -58,18 +59,18 @@ export default class Lexer implements LexerInterface {
|
||||
const input = this.input;
|
||||
const pos = this.pos;
|
||||
if (pos === input.length) {
|
||||
return new Token("EOF", pos, pos, this);
|
||||
return new Token("EOF", new SourceLocation(this, pos, pos));
|
||||
}
|
||||
const match = matchAt(tokenRegex, input, pos);
|
||||
if (match === null) {
|
||||
throw new ParseError(
|
||||
"Unexpected character: '" + input[pos] + "'",
|
||||
new Token(input[pos], pos, pos + 1, this));
|
||||
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
|
||||
}
|
||||
const text = match[2] || " ";
|
||||
const start = this.pos;
|
||||
this.pos += match[0].length;
|
||||
const end = this.pos;
|
||||
return new Token(text, start, end, this);
|
||||
return new Token(text, new SourceLocation(this, start, end));
|
||||
}
|
||||
}
|
||||
|
@@ -140,7 +140,7 @@ export default class MacroExpander implements MacroContextInterface {
|
||||
expansion.splice(i + 1, 1); // drop first #
|
||||
} else if (/^[1-9]$/.test(tok.text)) {
|
||||
// replace the placeholder with the indicated argument
|
||||
expansion.splice(i, 2, ...args[tok.text - 1]);
|
||||
expansion.splice(i, 2, ...args[+tok.text - 1]);
|
||||
} else {
|
||||
throw new ParseError(
|
||||
"Not a valid argument number",
|
||||
|
@@ -21,17 +21,16 @@ class ParseError {
|
||||
let error = "KaTeX parse error: " + message;
|
||||
let start;
|
||||
|
||||
if (token && token.lexer &&
|
||||
token.start != null && token.end != null &&
|
||||
token.start <= token.end) {
|
||||
const loc = token && token.loc;
|
||||
if (loc && loc.start <= loc.end) {
|
||||
// If we have the input and a position, make the error a bit fancier
|
||||
|
||||
// Get the input
|
||||
const input = token.lexer.input;
|
||||
const input = loc.lexer.input;
|
||||
|
||||
// Prepend some information
|
||||
start = token.start;
|
||||
const end = token.end;
|
||||
start = loc.start;
|
||||
const end = loc.end;
|
||||
if (start === input.length) {
|
||||
error += " at end of input: ";
|
||||
} else {
|
||||
|
@@ -1,5 +1,6 @@
|
||||
// @flow
|
||||
import {LexerInterface, Token} from "./Token";
|
||||
import {Token} from "./Token";
|
||||
import SourceLocation from "./SourceLocation";
|
||||
import type {Mode} from "./types";
|
||||
|
||||
/**
|
||||
@@ -14,12 +15,7 @@ export default class ParseNode {
|
||||
type: *;
|
||||
value: *;
|
||||
mode: Mode;
|
||||
// TODO: We should combine these to ({lexer, start, end}|void) as they
|
||||
// should all exist together or not exist at all. That way, only a single
|
||||
// void check needs to be done to see if we have metadata.
|
||||
lexer: LexerInterface | void;
|
||||
start: number | void;
|
||||
end: number | void;
|
||||
loc: ?SourceLocation;
|
||||
|
||||
constructor(
|
||||
type: string, // type of node, like e.g. "ordgroup"
|
||||
@@ -33,10 +29,6 @@ export default class ParseNode {
|
||||
this.type = type;
|
||||
this.value = value;
|
||||
this.mode = mode;
|
||||
if (firstToken && (!lastToken || lastToken.lexer === firstToken.lexer)) {
|
||||
this.lexer = firstToken.lexer;
|
||||
this.start = firstToken.start;
|
||||
this.end = (lastToken || firstToken).end;
|
||||
}
|
||||
this.loc = SourceLocation.range(firstToken, lastToken);
|
||||
}
|
||||
}
|
||||
|
43
src/SourceLocation.js
Normal file
43
src/SourceLocation.js
Normal file
@@ -0,0 +1,43 @@
|
||||
// @flow
|
||||
import type {LexerInterface} from "./Token";
|
||||
|
||||
/**
|
||||
* Lexing or parsing positional information for error reporting.
|
||||
* This object is immutable.
|
||||
*/
|
||||
export default class SourceLocation {
|
||||
lexer: LexerInterface; // Lexer holding the input string.
|
||||
start: number; // Start offset, zero-based inclusive.
|
||||
end: number; // End offset, zero-based exclusive.
|
||||
|
||||
constructor(lexer: LexerInterface, start: number, end: number) {
|
||||
this.lexer = lexer;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
Object.freeze(this); // Immutable to allow sharing in range().
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges two `SourceLocation`s from location providers, given they are
|
||||
* provided in order of appearance.
|
||||
* - Returns the first one's location if only the first is provided.
|
||||
* - Returns a merged range of the first and the last if both are provided
|
||||
* and their lexers match.
|
||||
* - Otherwise, returns null.
|
||||
*/
|
||||
static range(
|
||||
first?: {loc: ?SourceLocation},
|
||||
second?: {loc: ?SourceLocation},
|
||||
): ?SourceLocation {
|
||||
if (!second) {
|
||||
return first && first.loc;
|
||||
} else if (!first || !first.loc || !second.loc ||
|
||||
first.loc.lexer !== second.loc.lexer) {
|
||||
return null;
|
||||
} else {
|
||||
return new SourceLocation(
|
||||
first.loc.lexer, first.loc.start, second.loc.end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
27
src/Token.js
27
src/Token.js
@@ -1,4 +1,5 @@
|
||||
// @flow
|
||||
import SourceLocation from "./SourceLocation";
|
||||
|
||||
/**
|
||||
* Interface required to break circular dependency between Token, Lexer, and
|
||||
@@ -15,27 +16,20 @@ export interface LexerInterface {input: string, pos: number}
|
||||
* That way it is possible to attach extra metadata to the input string,
|
||||
* like for example a file name or similar.
|
||||
*
|
||||
* The position information (all three parameters) is optional,
|
||||
* so it is OK to construct synthetic tokens if appropriate.
|
||||
* Not providing available position information may lead to
|
||||
* degraded error reporting, though.
|
||||
* The position information is optional, so it is OK to construct synthetic
|
||||
* tokens if appropriate. Not providing available position information may
|
||||
* lead to degraded error reporting, though.
|
||||
*/
|
||||
export class Token {
|
||||
text: *;
|
||||
start: *;
|
||||
end: *;
|
||||
lexer: *;
|
||||
text: string;
|
||||
loc: ?SourceLocation;
|
||||
|
||||
constructor(
|
||||
text: string, // the text of this token
|
||||
start?: number, // the start offset, zero-based inclusive
|
||||
end?: number, // the end offset, zero-based exclusive
|
||||
lexer?: LexerInterface, // the lexer holding the input string
|
||||
loc: ?SourceLocation,
|
||||
) {
|
||||
this.text = text;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.lexer = lexer;
|
||||
this.loc = loc;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -46,10 +40,7 @@ export class Token {
|
||||
endToken: Token, // last token of the range, inclusive
|
||||
text: string, // the text of the newly constructed token
|
||||
) {
|
||||
if (endToken.lexer !== this.lexer) {
|
||||
return new Token(text); // sorry, no position information available
|
||||
}
|
||||
return new Token(text, this.start, endToken.end, this.lexer);
|
||||
return new Token(text, SourceLocation.range(this, endToken));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -61,10 +61,8 @@ const stripPositions = function(expr) {
|
||||
if (typeof expr !== "object" || expr === null) {
|
||||
return expr;
|
||||
}
|
||||
if (expr.lexer && typeof expr.start === "number") {
|
||||
delete expr.lexer;
|
||||
delete expr.start;
|
||||
delete expr.end;
|
||||
if (expr.loc && expr.loc.lexer && typeof expr.loc.start === "number") {
|
||||
delete expr.loc;
|
||||
}
|
||||
Object.keys(expr).forEach(function(key) {
|
||||
stripPositions(expr[key]);
|
||||
|
Reference in New Issue
Block a user