mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 11:18:39 +00:00
To @flow: Token, Lexer, ParseError, and ParseNode. (#839)
* To @flow: Token, Lexer, ParseError, and ParseNode. * PR fixes 1.
This commit is contained in:
committed by
Kevin Barabash
parent
12399da73d
commit
13f3eac741
@@ -5,6 +5,7 @@
|
||||
[include]
|
||||
|
||||
[libs]
|
||||
flow-typed
|
||||
|
||||
[lints]
|
||||
|
||||
|
3
flow-typed/match-at.js
vendored
Normal file
3
flow-typed/match-at.js
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
declare module 'match-at' {
|
||||
declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
|
||||
}
|
57
src/Lexer.js
57
src/Lexer.js
@@ -1,3 +1,4 @@
|
||||
// @flow
|
||||
/**
|
||||
* The Lexer class handles tokenizing the input in various ways. Since our
|
||||
* parser expects us to be able to backtrack, the lexer allows lexing from any
|
||||
@@ -13,48 +14,7 @@
|
||||
|
||||
import matchAt from "match-at";
|
||||
import ParseError from "./ParseError";
|
||||
|
||||
/**
|
||||
* The resulting token returned from `lex`.
|
||||
*
|
||||
* It consists of the token text plus some position information.
|
||||
* The position information is essentially a range in an input string,
|
||||
* but instead of referencing the bare input string, we refer to the lexer.
|
||||
* That way it is possible to attach extra metadata to the input string,
|
||||
* like for example a file name or similar.
|
||||
*
|
||||
* The position information (all three parameters) is optional,
|
||||
* so it is OK to construct synthetic tokens if appropriate.
|
||||
* Not providing available position information may lead to
|
||||
* degraded error reporting, though.
|
||||
*
|
||||
* @param {string} text the text of this token
|
||||
* @param {number=} start the start offset, zero-based inclusive
|
||||
* @param {number=} end the end offset, zero-based exclusive
|
||||
* @param {Lexer=} lexer the lexer which in turn holds the input string
|
||||
*/
|
||||
class Token {
|
||||
constructor(text, start, end, lexer) {
|
||||
this.text = text;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.lexer = lexer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a pair of tokens (this and endToken), compute a “Token” encompassing
|
||||
* the whole input range enclosed by these two.
|
||||
*
|
||||
* @param {Token} endToken last token of the range, inclusive
|
||||
* @param {string} text the text of the newly constructed token
|
||||
*/
|
||||
range(endToken, text) {
|
||||
if (endToken.lexer !== this.lexer) {
|
||||
return new Token(text); // sorry, no position information available
|
||||
}
|
||||
return new Token(text, this.start, endToken.end, this.lexer);
|
||||
}
|
||||
}
|
||||
import {LexerInterface, Token} from "./Token";
|
||||
|
||||
/* The following tokenRegex
|
||||
* - matches typical whitespace (but not NBSP etc.) using its first group
|
||||
@@ -79,11 +39,12 @@ const tokenRegex = new RegExp(
|
||||
")"
|
||||
);
|
||||
|
||||
/*
|
||||
* Main Lexer class
|
||||
*/
|
||||
class Lexer {
|
||||
constructor(input) {
|
||||
/** Main Lexer class */
|
||||
class Lexer implements LexerInterface {
|
||||
input: string;
|
||||
pos: number;
|
||||
|
||||
constructor(input: string) {
|
||||
this.input = input;
|
||||
this.pos = 0;
|
||||
}
|
||||
@@ -91,7 +52,7 @@ class Lexer {
|
||||
/**
|
||||
* This function lexes a single token.
|
||||
*/
|
||||
lex() {
|
||||
lex(): Token {
|
||||
const input = this.input;
|
||||
const pos = this.pos;
|
||||
if (pos === input.length) {
|
||||
|
@@ -1,3 +1,7 @@
|
||||
// @flow
|
||||
import ParseNode from "./ParseNode";
|
||||
import {Token} from "./Token";
|
||||
|
||||
/**
|
||||
* This is the ParseError class, which is the main error thrown by KaTeX
|
||||
* functions when something has gone wrong. This is used to distinguish internal
|
||||
@@ -5,17 +9,20 @@
|
||||
*
|
||||
* If possible, a caller should provide a Token or ParseNode with information
|
||||
* about where in the source string the problem occurred.
|
||||
*
|
||||
* @param {string} message The error message
|
||||
* @param {(Token|ParseNode)=} token An object providing position information
|
||||
*/
|
||||
class ParseError {
|
||||
constructor(message, token) {
|
||||
position: number|void; // Error position based on passed-in Token or ParseNode.
|
||||
|
||||
constructor(
|
||||
message: string, // The error message
|
||||
token?: Token|ParseNode, // An object providing position information
|
||||
) {
|
||||
let error = "KaTeX parse error: " + message;
|
||||
let start;
|
||||
let end;
|
||||
|
||||
if (token && token.lexer && token.start <= token.end) {
|
||||
if (token && token.lexer &&
|
||||
token.start != null && token.end != null &&
|
||||
token.start <= token.end) {
|
||||
// If we have the input and a position, make the error a bit fancier
|
||||
|
||||
// Get the input
|
||||
@@ -23,7 +30,7 @@ class ParseError {
|
||||
|
||||
// Prepend some information
|
||||
start = token.start;
|
||||
end = token.end;
|
||||
const end = token.end;
|
||||
if (start === input.length) {
|
||||
error += " at end of input: ";
|
||||
} else {
|
||||
@@ -47,20 +54,22 @@ class ParseError {
|
||||
right = input.slice(end);
|
||||
}
|
||||
error += left + underlined + right;
|
||||
|
||||
}
|
||||
|
||||
// Some hackery to make ParseError a prototype of Error
|
||||
// See http://stackoverflow.com/a/8460753
|
||||
const self = new Error(error);
|
||||
self.name = "ParseError";
|
||||
// $FlowFixMe
|
||||
self.__proto__ = ParseError.prototype;
|
||||
|
||||
// $FlowFixMe
|
||||
self.position = start;
|
||||
return self;
|
||||
}
|
||||
}
|
||||
|
||||
// More hackery
|
||||
// $FlowFixMe More hackery
|
||||
ParseError.prototype.__proto__ = Error.prototype;
|
||||
|
||||
module.exports = ParseError;
|
||||
|
@@ -1,22 +1,34 @@
|
||||
// @flow
|
||||
import {LexerInterface, Token} from "./Token";
|
||||
|
||||
/**
|
||||
* The resulting parse tree nodes of the parse tree.
|
||||
*
|
||||
* It is possible to provide position information, so that a ParseNode can
|
||||
* fulfil a role similar to a Token in error reporting.
|
||||
* For details on the corresponding properties see Token constructor.
|
||||
* It is possible to provide position information, so that a `ParseNode` can
|
||||
* fulfill a role similar to a `Token` in error reporting.
|
||||
* For details on the corresponding properties see `Token` constructor.
|
||||
* Providing such information can lead to better error reporting.
|
||||
*
|
||||
* @param {string} type type of node, like e.g. "ordgroup"
|
||||
* @param {?object} value type-specific representation of the node
|
||||
* @param {string} mode parse mode in action for this node,
|
||||
* "math" or "text"
|
||||
* @param {Token=} firstToken first token of the input for this node,
|
||||
* will omit position information if unset
|
||||
* @param {Token=} lastToken last token of the input for this node,
|
||||
* will default to firstToken if unset
|
||||
*/
|
||||
export default class ParseNode {
|
||||
constructor(type, value, mode, firstToken, lastToken) {
|
||||
type: *;
|
||||
value: *;
|
||||
mode: *;
|
||||
// TODO: We should combine these to ({lexer, start, end}|void) as they
|
||||
// should all exist together or not exist at all. That way, only a single
|
||||
// void check needs to be done to see if we have metadata.
|
||||
lexer: LexerInterface|void;
|
||||
start: number|void;
|
||||
end: number|void;
|
||||
|
||||
constructor(
|
||||
type: string, // type of node, like e.g. "ordgroup"
|
||||
value: mixed, // type-specific representation of the node
|
||||
mode: string, // parse mode in action for this node, "math" or "text"
|
||||
firstToken?: Token, // first token of the input for this node,
|
||||
// will omit position information if unset
|
||||
lastToken?: Token, // last token of the input for this node,
|
||||
// will default to firstToken if unset
|
||||
) {
|
||||
this.type = type;
|
||||
this.value = value;
|
||||
this.mode = mode;
|
||||
|
55
src/Token.js
Normal file
55
src/Token.js
Normal file
@@ -0,0 +1,55 @@
|
||||
// @flow
|
||||
|
||||
/**
|
||||
* Interface required to break circular dependency between Token, Lexer, and
|
||||
* ParseError.
|
||||
*/
|
||||
export interface LexerInterface {input: string, pos: number}
|
||||
|
||||
/**
|
||||
* The resulting token returned from `lex`.
|
||||
*
|
||||
* It consists of the token text plus some position information.
|
||||
* The position information is essentially a range in an input string,
|
||||
* but instead of referencing the bare input string, we refer to the lexer.
|
||||
* That way it is possible to attach extra metadata to the input string,
|
||||
* like for example a file name or similar.
|
||||
*
|
||||
* The position information (all three parameters) is optional,
|
||||
* so it is OK to construct synthetic tokens if appropriate.
|
||||
* Not providing available position information may lead to
|
||||
* degraded error reporting, though.
|
||||
*/
|
||||
export class Token {
|
||||
text: *;
|
||||
start: *;
|
||||
end: *;
|
||||
lexer: *;
|
||||
|
||||
constructor(
|
||||
text: string, // the text of this token
|
||||
start?: number, // the start offset, zero-based inclusive
|
||||
end?: number, // the end offset, zero-based exclusive
|
||||
lexer?: LexerInterface, // the lexer holding the input string
|
||||
) {
|
||||
this.text = text;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.lexer = lexer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a pair of tokens (this and endToken), compute a `Token` encompassing
|
||||
* the whole input range enclosed by these two.
|
||||
*/
|
||||
range(
|
||||
endToken: Token, // last token of the range, inclusive
|
||||
text: string, // the text of the newly constructed token
|
||||
) {
|
||||
if (endToken.lexer !== this.lexer) {
|
||||
return new Token(text); // sorry, no position information available
|
||||
}
|
||||
return new Token(text, this.start, endToken.end, this.lexer);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user