To @flow: Token, Lexer, ParseError, and ParseNode. (#839)

* To @flow: Token, Lexer, ParseError, and ParseNode.

* PR fixes 1.
This commit is contained in:
Ashish Myles
2017-09-04 15:27:58 -04:00
committed by Kevin Barabash
parent 12399da73d
commit 13f3eac741
6 changed files with 111 additions and 70 deletions

View File

@@ -5,6 +5,7 @@
[include]
[libs]
flow-typed
[lints]

3
flow-typed/match-at.js vendored Normal file
View File

@@ -0,0 +1,3 @@
declare module 'match-at' {
declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
}

View File

@@ -1,3 +1,4 @@
// @flow
/**
* The Lexer class handles tokenizing the input in various ways. Since our
* parser expects us to be able to backtrack, the lexer allows lexing from any
@@ -13,48 +14,7 @@
import matchAt from "match-at";
import ParseError from "./ParseError";
/**
* The resulting token returned from `lex`.
*
* It consists of the token text plus some position information.
* The position information is essentially a range in an input string,
* but instead of referencing the bare input string, we refer to the lexer.
* That way it is possible to attach extra metadata to the input string,
* like for example a file name or similar.
*
* The position information (all three parameters) is optional,
* so it is OK to construct synthetic tokens if appropriate.
* Not providing available position information may lead to
* degraded error reporting, though.
*
* @param {string} text the text of this token
* @param {number=} start the start offset, zero-based inclusive
* @param {number=} end the end offset, zero-based exclusive
* @param {Lexer=} lexer the lexer which in turn holds the input string
*/
class Token {
constructor(text, start, end, lexer) {
this.text = text;
this.start = start;
this.end = end;
this.lexer = lexer;
}
/**
* Given a pair of tokens (this and endToken), compute a “Token” encompassing
* the whole input range enclosed by these two.
*
* @param {Token} endToken last token of the range, inclusive
* @param {string} text the text of the newly constructed token
*/
range(endToken, text) {
if (endToken.lexer !== this.lexer) {
return new Token(text); // sorry, no position information available
}
return new Token(text, this.start, endToken.end, this.lexer);
}
}
import {LexerInterface, Token} from "./Token";
/* The following tokenRegex
* - matches typical whitespace (but not NBSP etc.) using its first group
@@ -79,11 +39,12 @@ const tokenRegex = new RegExp(
")"
);
/*
* Main Lexer class
*/
class Lexer {
constructor(input) {
/** Main Lexer class */
class Lexer implements LexerInterface {
input: string;
pos: number;
constructor(input: string) {
this.input = input;
this.pos = 0;
}
@@ -91,7 +52,7 @@ class Lexer {
/**
* This function lexes a single token.
*/
lex() {
lex(): Token {
const input = this.input;
const pos = this.pos;
if (pos === input.length) {

View File

@@ -1,3 +1,7 @@
// @flow
import ParseNode from "./ParseNode";
import {Token} from "./Token";
/**
* This is the ParseError class, which is the main error thrown by KaTeX
* functions when something has gone wrong. This is used to distinguish internal
@@ -5,17 +9,20 @@
*
* If possible, a caller should provide a Token or ParseNode with information
* about where in the source string the problem occurred.
*
* @param {string} message The error message
* @param {(Token|ParseNode)=} token An object providing position information
*/
class ParseError {
constructor(message, token) {
position: number|void; // Error position based on passed-in Token or ParseNode.
constructor(
message: string, // The error message
token?: Token|ParseNode, // An object providing position information
) {
let error = "KaTeX parse error: " + message;
let start;
let end;
if (token && token.lexer && token.start <= token.end) {
if (token && token.lexer &&
token.start != null && token.end != null &&
token.start <= token.end) {
// If we have the input and a position, make the error a bit fancier
// Get the input
@@ -23,7 +30,7 @@ class ParseError {
// Prepend some information
start = token.start;
end = token.end;
const end = token.end;
if (start === input.length) {
error += " at end of input: ";
} else {
@@ -47,20 +54,22 @@ class ParseError {
right = input.slice(end);
}
error += left + underlined + right;
}
// Some hackery to make ParseError a prototype of Error
// See http://stackoverflow.com/a/8460753
const self = new Error(error);
self.name = "ParseError";
// $FlowFixMe
self.__proto__ = ParseError.prototype;
// $FlowFixMe
self.position = start;
return self;
}
}
// More hackery
// $FlowFixMe More hackery
ParseError.prototype.__proto__ = Error.prototype;
module.exports = ParseError;

View File

@@ -1,22 +1,34 @@
// @flow
import {LexerInterface, Token} from "./Token";
/**
* The resulting parse tree nodes of the parse tree.
*
* It is possible to provide position information, so that a ParseNode can
* fulfil a role similar to a Token in error reporting.
* For details on the corresponding properties see Token constructor.
* It is possible to provide position information, so that a `ParseNode` can
* fulfill a role similar to a `Token` in error reporting.
* For details on the corresponding properties see `Token` constructor.
* Providing such information can lead to better error reporting.
*
* @param {string} type type of node, like e.g. "ordgroup"
* @param {?object} value type-specific representation of the node
* @param {string} mode parse mode in action for this node,
* "math" or "text"
* @param {Token=} firstToken first token of the input for this node,
* will omit position information if unset
* @param {Token=} lastToken last token of the input for this node,
* will default to firstToken if unset
*/
export default class ParseNode {
constructor(type, value, mode, firstToken, lastToken) {
type: *;
value: *;
mode: *;
// TODO: We should combine these to ({lexer, start, end}|void) as they
// should all exist together or not exist at all. That way, only a single
// void check needs to be done to see if we have metadata.
lexer: LexerInterface|void;
start: number|void;
end: number|void;
constructor(
type: string, // type of node, like e.g. "ordgroup"
value: mixed, // type-specific representation of the node
mode: string, // parse mode in action for this node, "math" or "text"
firstToken?: Token, // first token of the input for this node,
// will omit position information if unset
lastToken?: Token, // last token of the input for this node,
// will default to firstToken if unset
) {
this.type = type;
this.value = value;
this.mode = mode;

55
src/Token.js Normal file
View File

@@ -0,0 +1,55 @@
// @flow
/**
* Interface required to break circular dependency between Token, Lexer, and
* ParseError.
*/
export interface LexerInterface {input: string, pos: number}
/**
* The resulting token returned from `lex`.
*
* It consists of the token text plus some position information.
* The position information is essentially a range in an input string,
* but instead of referencing the bare input string, we refer to the lexer.
* That way it is possible to attach extra metadata to the input string,
* like for example a file name or similar.
*
* The position information (all three parameters) is optional,
* so it is OK to construct synthetic tokens if appropriate.
* Not providing available position information may lead to
* degraded error reporting, though.
*/
export class Token {
text: *;
start: *;
end: *;
lexer: *;
constructor(
text: string, // the text of this token
start?: number, // the start offset, zero-based inclusive
end?: number, // the end offset, zero-based exclusive
lexer?: LexerInterface, // the lexer holding the input string
) {
this.text = text;
this.start = start;
this.end = end;
this.lexer = lexer;
}
/**
* Given a pair of tokens (this and endToken), compute a `Token` encompassing
* the whole input range enclosed by these two.
*/
range(
endToken: Token, // last token of the range, inclusive
text: string, // the text of the newly constructed token
) {
if (endToken.lexer !== this.lexer) {
return new Token(text); // sorry, no position information available
}
return new Token(text, this.start, endToken.end, this.lexer);
}
}