From 13f3eac7418ee35f9779aa9c4ad90587c06addf3 Mon Sep 17 00:00:00 2001 From: Ashish Myles Date: Mon, 4 Sep 2017 15:27:58 -0400 Subject: [PATCH] To @flow: Token, Lexer, ParseError, and ParseNode. (#839) * To @flow: Token, Lexer, ParseError, and ParseNode. * PR fixes 1. --- .flowconfig | 1 + flow-typed/match-at.js | 3 +++ src/Lexer.js | 57 +++++++----------------------------------- src/ParseError.js | 27 +++++++++++++------- src/ParseNode.js | 38 ++++++++++++++++++---------- src/Token.js | 55 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 70 deletions(-) create mode 100644 flow-typed/match-at.js create mode 100644 src/Token.js diff --git a/.flowconfig b/.flowconfig index 1bd2021b..9d4ebfce 100644 --- a/.flowconfig +++ b/.flowconfig @@ -5,6 +5,7 @@ [include] [libs] +flow-typed [lints] diff --git a/flow-typed/match-at.js b/flow-typed/match-at.js new file mode 100644 index 00000000..86444b04 --- /dev/null +++ b/flow-typed/match-at.js @@ -0,0 +1,3 @@ +declare module 'match-at' { + declare module.exports: (re: RegExp, str: string, pos: number) => (Array|null); +} diff --git a/src/Lexer.js b/src/Lexer.js index 88fc3fdc..341989f4 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -1,3 +1,4 @@ +// @flow /** * The Lexer class handles tokenizing the input in various ways. Since our * parser expects us to be able to backtrack, the lexer allows lexing from any @@ -13,48 +14,7 @@ import matchAt from "match-at"; import ParseError from "./ParseError"; - -/** - * The resulting token returned from `lex`. - * - * It consists of the token text plus some position information. - * The position information is essentially a range in an input string, - * but instead of referencing the bare input string, we refer to the lexer. - * That way it is possible to attach extra metadata to the input string, - * like for example a file name or similar. - * - * The position information (all three parameters) is optional, - * so it is OK to construct synthetic tokens if appropriate. - * Not providing available position information may lead to - * degraded error reporting, though. - * - * @param {string} text the text of this token - * @param {number=} start the start offset, zero-based inclusive - * @param {number=} end the end offset, zero-based exclusive - * @param {Lexer=} lexer the lexer which in turn holds the input string - */ -class Token { - constructor(text, start, end, lexer) { - this.text = text; - this.start = start; - this.end = end; - this.lexer = lexer; - } - - /** - * Given a pair of tokens (this and endToken), compute a “Token” encompassing - * the whole input range enclosed by these two. - * - * @param {Token} endToken last token of the range, inclusive - * @param {string} text the text of the newly constructed token - */ - range(endToken, text) { - if (endToken.lexer !== this.lexer) { - return new Token(text); // sorry, no position information available - } - return new Token(text, this.start, endToken.end, this.lexer); - } -} +import {LexerInterface, Token} from "./Token"; /* The following tokenRegex * - matches typical whitespace (but not NBSP etc.) using its first group @@ -79,11 +39,12 @@ const tokenRegex = new RegExp( ")" ); -/* - * Main Lexer class - */ -class Lexer { - constructor(input) { +/** Main Lexer class */ +class Lexer implements LexerInterface { + input: string; + pos: number; + + constructor(input: string) { this.input = input; this.pos = 0; } @@ -91,7 +52,7 @@ class Lexer { /** * This function lexes a single token. */ - lex() { + lex(): Token { const input = this.input; const pos = this.pos; if (pos === input.length) { diff --git a/src/ParseError.js b/src/ParseError.js index 862ed8b8..39457471 100644 --- a/src/ParseError.js +++ b/src/ParseError.js @@ -1,3 +1,7 @@ +// @flow +import ParseNode from "./ParseNode"; +import {Token} from "./Token"; + /** * This is the ParseError class, which is the main error thrown by KaTeX * functions when something has gone wrong. This is used to distinguish internal @@ -5,17 +9,20 @@ * * If possible, a caller should provide a Token or ParseNode with information * about where in the source string the problem occurred. - * - * @param {string} message The error message - * @param {(Token|ParseNode)=} token An object providing position information */ class ParseError { - constructor(message, token) { + position: number|void; // Error position based on passed-in Token or ParseNode. + + constructor( + message: string, // The error message + token?: Token|ParseNode, // An object providing position information + ) { let error = "KaTeX parse error: " + message; let start; - let end; - if (token && token.lexer && token.start <= token.end) { + if (token && token.lexer && + token.start != null && token.end != null && + token.start <= token.end) { // If we have the input and a position, make the error a bit fancier // Get the input @@ -23,7 +30,7 @@ class ParseError { // Prepend some information start = token.start; - end = token.end; + const end = token.end; if (start === input.length) { error += " at end of input: "; } else { @@ -47,20 +54,22 @@ class ParseError { right = input.slice(end); } error += left + underlined + right; + } // Some hackery to make ParseError a prototype of Error // See http://stackoverflow.com/a/8460753 const self = new Error(error); self.name = "ParseError"; + // $FlowFixMe self.__proto__ = ParseError.prototype; - + // $FlowFixMe self.position = start; return self; } } -// More hackery +// $FlowFixMe More hackery ParseError.prototype.__proto__ = Error.prototype; module.exports = ParseError; diff --git a/src/ParseNode.js b/src/ParseNode.js index bfbe4315..cb562126 100644 --- a/src/ParseNode.js +++ b/src/ParseNode.js @@ -1,22 +1,34 @@ +// @flow +import {LexerInterface, Token} from "./Token"; + /** * The resulting parse tree nodes of the parse tree. * - * It is possible to provide position information, so that a ParseNode can - * fulfil a role similar to a Token in error reporting. - * For details on the corresponding properties see Token constructor. + * It is possible to provide position information, so that a `ParseNode` can + * fulfill a role similar to a `Token` in error reporting. + * For details on the corresponding properties see `Token` constructor. * Providing such information can lead to better error reporting. - * - * @param {string} type type of node, like e.g. "ordgroup" - * @param {?object} value type-specific representation of the node - * @param {string} mode parse mode in action for this node, - * "math" or "text" - * @param {Token=} firstToken first token of the input for this node, - * will omit position information if unset - * @param {Token=} lastToken last token of the input for this node, - * will default to firstToken if unset */ export default class ParseNode { - constructor(type, value, mode, firstToken, lastToken) { + type: *; + value: *; + mode: *; + // TODO: We should combine these to ({lexer, start, end}|void) as they + // should all exist together or not exist at all. That way, only a single + // void check needs to be done to see if we have metadata. + lexer: LexerInterface|void; + start: number|void; + end: number|void; + + constructor( + type: string, // type of node, like e.g. "ordgroup" + value: mixed, // type-specific representation of the node + mode: string, // parse mode in action for this node, "math" or "text" + firstToken?: Token, // first token of the input for this node, + // will omit position information if unset + lastToken?: Token, // last token of the input for this node, + // will default to firstToken if unset + ) { this.type = type; this.value = value; this.mode = mode; diff --git a/src/Token.js b/src/Token.js new file mode 100644 index 00000000..84716972 --- /dev/null +++ b/src/Token.js @@ -0,0 +1,55 @@ +// @flow + +/** + * Interface required to break circular dependency between Token, Lexer, and + * ParseError. + */ +export interface LexerInterface {input: string, pos: number} + +/** + * The resulting token returned from `lex`. + * + * It consists of the token text plus some position information. + * The position information is essentially a range in an input string, + * but instead of referencing the bare input string, we refer to the lexer. + * That way it is possible to attach extra metadata to the input string, + * like for example a file name or similar. + * + * The position information (all three parameters) is optional, + * so it is OK to construct synthetic tokens if appropriate. + * Not providing available position information may lead to + * degraded error reporting, though. + */ +export class Token { + text: *; + start: *; + end: *; + lexer: *; + + constructor( + text: string, // the text of this token + start?: number, // the start offset, zero-based inclusive + end?: number, // the end offset, zero-based exclusive + lexer?: LexerInterface, // the lexer holding the input string + ) { + this.text = text; + this.start = start; + this.end = end; + this.lexer = lexer; + } + + /** + * Given a pair of tokens (this and endToken), compute a `Token` encompassing + * the whole input range enclosed by these two. + */ + range( + endToken: Token, // last token of the range, inclusive + text: string, // the text of the newly constructed token + ) { + if (endToken.lexer !== this.lexer) { + return new Token(text); // sorry, no position information available + } + return new Token(text, this.start, endToken.end, this.lexer); + } +} +