mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-10 21:48:41 +00:00
lexer: Remove match-at
dependency, use RegExp (#1447)
* lexer: Remove `match-at` dependency, use RegExp * chore(package): update flow-bin to version 0.75.0 * Fix flow error * Remove unused flow libs * Minor fix * Throw an error when `RegExp.exec` jumps
This commit is contained in:
25
src/Lexer.js
25
src/Lexer.js
@@ -12,7 +12,6 @@
|
||||
* kinds.
|
||||
*/
|
||||
|
||||
import matchAt from "match-at";
|
||||
import ParseError from "./ParseError";
|
||||
import SourceLocation from "./SourceLocation";
|
||||
import {LexerInterface, Token} from "./Token";
|
||||
@@ -44,8 +43,7 @@ const controlWordWhitespaceRegex = new RegExp(
|
||||
const combiningDiacriticalMarkString = "[\u0300-\u036f]";
|
||||
export const combiningDiacriticalMarksEndRegex =
|
||||
new RegExp(`${combiningDiacriticalMarkString}+$`);
|
||||
const tokenRegex = new RegExp(
|
||||
`(${spaceRegexString}+)|` + // whitespace
|
||||
const tokenRegexString = `(${spaceRegexString}+)|` + // whitespace
|
||||
`(${commentRegexString}` + // comments
|
||||
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
||||
@@ -54,11 +52,8 @@ const tokenRegex = new RegExp(
|
||||
"|\\\\verb\\*([^]).*?\\3" + // \verb*
|
||||
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
|
||||
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces
|
||||
`|${controlSymbolRegexString}` + // \\, \', etc.
|
||||
")"
|
||||
);
|
||||
`|${controlSymbolRegexString})`; // \\, \', etc.
|
||||
|
||||
// tokenRegex has no ^ marker, as required by matchAt.
|
||||
// These regexs are for matching results from tokenRegex,
|
||||
// so they do have ^ markers.
|
||||
export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
|
||||
@@ -67,12 +62,12 @@ const commentRegex = new RegExp(`^${commentRegexString}`);
|
||||
/** Main Lexer class */
|
||||
export default class Lexer implements LexerInterface {
|
||||
input: string;
|
||||
pos: number;
|
||||
tokenRegex: RegExp;
|
||||
|
||||
constructor(input: string) {
|
||||
// Separate accents from characters
|
||||
this.input = input;
|
||||
this.pos = 0;
|
||||
this.tokenRegex = new RegExp(tokenRegexString, 'g');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -80,20 +75,17 @@ export default class Lexer implements LexerInterface {
|
||||
*/
|
||||
lex(): Token {
|
||||
const input = this.input;
|
||||
const pos = this.pos;
|
||||
const pos = this.tokenRegex.lastIndex;
|
||||
if (pos === input.length) {
|
||||
return new Token("EOF", new SourceLocation(this, pos, pos));
|
||||
}
|
||||
const match = matchAt(tokenRegex, input, pos);
|
||||
if (match === null) {
|
||||
const match = this.tokenRegex.exec(input);
|
||||
if (match === null || match.index !== pos) {
|
||||
throw new ParseError(
|
||||
`Unexpected character: '${input[pos]}'`,
|
||||
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
|
||||
}
|
||||
let text = match[2] || " ";
|
||||
const start = this.pos;
|
||||
this.pos += match[0].length;
|
||||
const end = this.pos;
|
||||
|
||||
// Trim any trailing whitespace from control word match
|
||||
const controlMatch = text.match(controlWordWhitespaceRegex);
|
||||
@@ -104,7 +96,8 @@ export default class Lexer implements LexerInterface {
|
||||
if (commentRegex.test(text)) {
|
||||
return this.lex();
|
||||
} else {
|
||||
return new Token(text, new SourceLocation(this, start, end));
|
||||
return new Token(text, new SourceLocation(this, pos,
|
||||
this.tokenRegex.lastIndex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ import SourceLocation from "./SourceLocation";
|
||||
* Interface required to break circular dependency between Token, Lexer, and
|
||||
* ParseError.
|
||||
*/
|
||||
export interface LexerInterface {input: string, pos: number}
|
||||
export interface LexerInterface {input: string, tokenRegex: RegExp}
|
||||
|
||||
/**
|
||||
* The resulting token returned from `lex`.
|
||||
@@ -43,4 +43,3 @@ export class Token {
|
||||
return new Token(text, SourceLocation.range(this, endToken));
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user