lexer: Remove match-at dependency, use RegExp (#1447)

* lexer: Remove `match-at` dependency, use RegExp

* chore(package): update flow-bin to version 0.75.0

* Fix flow error

* Remove unused flow libs

* Minor fix

* Throw an error when `RegExp.exec` jumps
This commit is contained in:
ylemkimon
2018-06-28 03:13:27 +09:00
committed by GitHub
parent 12dcb05209
commit 518379aed5
10 changed files with 37 additions and 46 deletions

View File

@@ -4,8 +4,6 @@
[include] [include]
[libs] [libs]
flow-typed
./node_modules/stylelint/decls
[lints] [lints]

View File

@@ -1,3 +0,0 @@
declare module 'match-at' {
declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
}

View File

@@ -1,6 +0,0 @@
declare module 'object-assign' {
declare module.exports:
<T>(target: {[string]: T}, ...sources: Array<{[string]: T}>)
=> {[string]: T};
}

11
package-lock.json generated
View File

@@ -4212,9 +4212,9 @@
"dev": true "dev": true
}, },
"flow-bin": { "flow-bin": {
"version": "0.74.0", "version": "0.75.0",
"resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.74.0.tgz", "resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.75.0.tgz",
"integrity": "sha512-tIN9J5qg71S4UbofCu80tve8a+p7Hj7ytwUtu79cLg9KJVVTNnVVJXKgCghVzaZT1Rvl9SMHVPlDs9uYhPHEGQ==", "integrity": "sha1-uW0e6Z07RGoyJr5mtAEyJM6d8mA=",
"dev": true "dev": true
}, },
"flush-write-stream": { "flush-write-stream": {
@@ -7430,11 +7430,6 @@
"integrity": "sha512-NcWuJFHDA8V3wkDgR/j4+gZx+YQwstPgfQDV8ndUeWWzta3dnDTBxpVzqS9lkmJAuV5YX35lmyojl6HO5JXAgw==", "integrity": "sha512-NcWuJFHDA8V3wkDgR/j4+gZx+YQwstPgfQDV8ndUeWWzta3dnDTBxpVzqS9lkmJAuV5YX35lmyojl6HO5JXAgw==",
"dev": true "dev": true
}, },
"match-at": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/match-at/-/match-at-0.1.1.tgz",
"integrity": "sha512-h4Yd392z9mST+dzc+yjuybOGFNOZjmXIPKWjxBd1Bb23r4SmDOsk2NYCU2BMUBGbSpZqwVsZYNq26QS3xfaT3Q=="
},
"math-expression-evaluator": { "math-expression-evaluator": {
"version": "1.2.17", "version": "1.2.17",
"resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz", "resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz",

View File

@@ -30,7 +30,7 @@
"eslint": "^5.0.0", "eslint": "^5.0.0",
"eslint-plugin-flowtype": "^2.40.1", "eslint-plugin-flowtype": "^2.40.1",
"file-loader": "^1.1.11", "file-loader": "^1.1.11",
"flow-bin": "^0.74.0", "flow-bin": "^0.75.0",
"husky": "^1.0.0-rc.8", "husky": "^1.0.0-rc.8",
"jest": "^23.0.1", "jest": "^23.0.1",
"jest-serializer-html": "^5.0.0", "jest-serializer-html": "^5.0.0",
@@ -80,7 +80,6 @@
"dist:dist": "rimraf dist/ && cp -r build/katex/ dist/" "dist:dist": "rimraf dist/ && cp -r build/katex/ dist/"
}, },
"dependencies": { "dependencies": {
"match-at": "^0.1.1",
"nomnom": "^1.8.1" "nomnom": "^1.8.1"
}, },
"husky": { "husky": {

View File

@@ -12,7 +12,6 @@
* kinds. * kinds.
*/ */
import matchAt from "match-at";
import ParseError from "./ParseError"; import ParseError from "./ParseError";
import SourceLocation from "./SourceLocation"; import SourceLocation from "./SourceLocation";
import {LexerInterface, Token} from "./Token"; import {LexerInterface, Token} from "./Token";
@@ -44,8 +43,7 @@ const controlWordWhitespaceRegex = new RegExp(
const combiningDiacriticalMarkString = "[\u0300-\u036f]"; const combiningDiacriticalMarkString = "[\u0300-\u036f]";
export const combiningDiacriticalMarksEndRegex = export const combiningDiacriticalMarksEndRegex =
new RegExp(`${combiningDiacriticalMarkString}+$`); new RegExp(`${combiningDiacriticalMarkString}+$`);
const tokenRegex = new RegExp( const tokenRegexString = `(${spaceRegexString}+)|` + // whitespace
`(${spaceRegexString}+)|` + // whitespace
`(${commentRegexString}` + // comments `(${commentRegexString}` + // comments
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
`${combiningDiacriticalMarkString}*` + // ...plus accents `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -54,11 +52,8 @@ const tokenRegex = new RegExp(
"|\\\\verb\\*([^]).*?\\3" + // \verb* "|\\\\verb\\*([^]).*?\\3" + // \verb*
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred "|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces `|${controlWordWhitespaceRegexString}` + // \macroName + spaces
`|${controlSymbolRegexString}` + // \\, \', etc. `|${controlSymbolRegexString})`; // \\, \', etc.
")"
);
// tokenRegex has no ^ marker, as required by matchAt.
// These regexs are for matching results from tokenRegex, // These regexs are for matching results from tokenRegex,
// so they do have ^ markers. // so they do have ^ markers.
export const controlWordRegex = new RegExp(`^${controlWordRegexString}`); export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
@@ -67,12 +62,12 @@ const commentRegex = new RegExp(`^${commentRegexString}`);
/** Main Lexer class */ /** Main Lexer class */
export default class Lexer implements LexerInterface { export default class Lexer implements LexerInterface {
input: string; input: string;
pos: number; tokenRegex: RegExp;
constructor(input: string) { constructor(input: string) {
// Separate accents from characters // Separate accents from characters
this.input = input; this.input = input;
this.pos = 0; this.tokenRegex = new RegExp(tokenRegexString, 'g');
} }
/** /**
@@ -80,20 +75,17 @@ export default class Lexer implements LexerInterface {
*/ */
lex(): Token { lex(): Token {
const input = this.input; const input = this.input;
const pos = this.pos; const pos = this.tokenRegex.lastIndex;
if (pos === input.length) { if (pos === input.length) {
return new Token("EOF", new SourceLocation(this, pos, pos)); return new Token("EOF", new SourceLocation(this, pos, pos));
} }
const match = matchAt(tokenRegex, input, pos); const match = this.tokenRegex.exec(input);
if (match === null) { if (match === null || match.index !== pos) {
throw new ParseError( throw new ParseError(
`Unexpected character: '${input[pos]}'`, `Unexpected character: '${input[pos]}'`,
new Token(input[pos], new SourceLocation(this, pos, pos + 1))); new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
} }
let text = match[2] || " "; let text = match[2] || " ";
const start = this.pos;
this.pos += match[0].length;
const end = this.pos;
// Trim any trailing whitespace from control word match // Trim any trailing whitespace from control word match
const controlMatch = text.match(controlWordWhitespaceRegex); const controlMatch = text.match(controlWordWhitespaceRegex);
@@ -104,7 +96,8 @@ export default class Lexer implements LexerInterface {
if (commentRegex.test(text)) { if (commentRegex.test(text)) {
return this.lex(); return this.lex();
} else { } else {
return new Token(text, new SourceLocation(this, start, end)); return new Token(text, new SourceLocation(this, pos,
this.tokenRegex.lastIndex));
} }
} }
} }

View File

@@ -5,7 +5,7 @@ import SourceLocation from "./SourceLocation";
* Interface required to break circular dependency between Token, Lexer, and * Interface required to break circular dependency between Token, Lexer, and
* ParseError. * ParseError.
*/ */
export interface LexerInterface {input: string, pos: number} export interface LexerInterface {input: string, tokenRegex: RegExp}
/** /**
* The resulting token returned from `lex`. * The resulting token returned from `lex`.
@@ -43,4 +43,3 @@ export class Token {
return new Token(text, SourceLocation.range(this, endToken)); return new Token(text, SourceLocation.range(this, endToken));
} }
} }

View File

@@ -27,7 +27,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
"end": 37, "end": 37,
"lexer": { "lexer": {
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
"pos": 56 "tokenRegex": {
"lastIndex": 56
}
}, },
"start": 36 "start": 36
}, },
@@ -56,7 +58,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
"end": 39, "end": 39,
"lexer": { "lexer": {
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
"pos": 56 "tokenRegex": {
"lastIndex": 56
}
}, },
"start": 38 "start": 38
}, },
@@ -87,7 +91,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
"end": 42, "end": 42,
"lexer": { "lexer": {
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
"pos": 56 "tokenRegex": {
"lastIndex": 56
}
}, },
"start": 41 "start": 41
}, },
@@ -116,7 +122,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
"end": 44, "end": 44,
"lexer": { "lexer": {
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
"pos": 56 "tokenRegex": {
"lastIndex": 56
}
}, },
"start": 43 "start": 43
}, },

View File

@@ -285,9 +285,9 @@ describe("Lexer:", function() {
describe("#_innerLex", function() { describe("#_innerLex", function() {
it("rejects lone surrogate char", function() { it("rejects lone surrogate char", function() {
expect("\udcba").toFailWithParseError( expect("\udcba ").toFailWithParseError(
"Unexpected character: '\udcba' at position 1:" + "Unexpected character: '\udcba' at position 1:" +
" \udcba\u0332"); " \udcba\u0332 ");
}); });
it("rejects lone backslash at end of input", function() { it("rejects lone backslash at end of input", function() {
expect("\\").toFailWithParseError( expect("\\").toFailWithParseError(

View File

@@ -23,9 +23,17 @@ const typeFirstCompare = (a, b) => {
} }
}; };
const regExpReplacer = (key, value) => {
return value instanceof RegExp ? {lastIndex: value.lastIndex} : value;
};
const serializer = { const serializer = {
print(val) { print(val) {
return stringify(val, {cmp: typeFirstCompare, space: ' '}); return stringify(val, {
cmp: typeFirstCompare,
space: ' ',
replacer: regExpReplacer,
});
}, },
test(val) { test(val) {
// Leave strings (e.g. XML) to other serializers // Leave strings (e.g. XML) to other serializers