mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-11 22:18:41 +00:00
lexer: Remove match-at
dependency, use RegExp (#1447)
* lexer: Remove `match-at` dependency, use RegExp * chore(package): update flow-bin to version 0.75.0 * Fix flow error * Remove unused flow libs * Minor fix * Throw an error when `RegExp.exec` jumps
This commit is contained in:
@@ -4,8 +4,6 @@
|
|||||||
[include]
|
[include]
|
||||||
|
|
||||||
[libs]
|
[libs]
|
||||||
flow-typed
|
|
||||||
./node_modules/stylelint/decls
|
|
||||||
|
|
||||||
[lints]
|
[lints]
|
||||||
|
|
||||||
|
3
flow-typed/match-at.js
vendored
3
flow-typed/match-at.js
vendored
@@ -1,3 +0,0 @@
|
|||||||
declare module 'match-at' {
|
|
||||||
declare module.exports: (re: RegExp, str: string, pos: number) => (Array<string>|null);
|
|
||||||
}
|
|
6
flow-typed/object-assign.js
vendored
6
flow-typed/object-assign.js
vendored
@@ -1,6 +0,0 @@
|
|||||||
declare module 'object-assign' {
|
|
||||||
declare module.exports:
|
|
||||||
<T>(target: {[string]: T}, ...sources: Array<{[string]: T}>)
|
|
||||||
=> {[string]: T};
|
|
||||||
}
|
|
||||||
|
|
11
package-lock.json
generated
11
package-lock.json
generated
@@ -4212,9 +4212,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"flow-bin": {
|
"flow-bin": {
|
||||||
"version": "0.74.0",
|
"version": "0.75.0",
|
||||||
"resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.74.0.tgz",
|
"resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.75.0.tgz",
|
||||||
"integrity": "sha512-tIN9J5qg71S4UbofCu80tve8a+p7Hj7ytwUtu79cLg9KJVVTNnVVJXKgCghVzaZT1Rvl9SMHVPlDs9uYhPHEGQ==",
|
"integrity": "sha1-uW0e6Z07RGoyJr5mtAEyJM6d8mA=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"flush-write-stream": {
|
"flush-write-stream": {
|
||||||
@@ -7430,11 +7430,6 @@
|
|||||||
"integrity": "sha512-NcWuJFHDA8V3wkDgR/j4+gZx+YQwstPgfQDV8ndUeWWzta3dnDTBxpVzqS9lkmJAuV5YX35lmyojl6HO5JXAgw==",
|
"integrity": "sha512-NcWuJFHDA8V3wkDgR/j4+gZx+YQwstPgfQDV8ndUeWWzta3dnDTBxpVzqS9lkmJAuV5YX35lmyojl6HO5JXAgw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"match-at": {
|
|
||||||
"version": "0.1.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/match-at/-/match-at-0.1.1.tgz",
|
|
||||||
"integrity": "sha512-h4Yd392z9mST+dzc+yjuybOGFNOZjmXIPKWjxBd1Bb23r4SmDOsk2NYCU2BMUBGbSpZqwVsZYNq26QS3xfaT3Q=="
|
|
||||||
},
|
|
||||||
"math-expression-evaluator": {
|
"math-expression-evaluator": {
|
||||||
"version": "1.2.17",
|
"version": "1.2.17",
|
||||||
"resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz",
|
"resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz",
|
||||||
|
@@ -30,7 +30,7 @@
|
|||||||
"eslint": "^5.0.0",
|
"eslint": "^5.0.0",
|
||||||
"eslint-plugin-flowtype": "^2.40.1",
|
"eslint-plugin-flowtype": "^2.40.1",
|
||||||
"file-loader": "^1.1.11",
|
"file-loader": "^1.1.11",
|
||||||
"flow-bin": "^0.74.0",
|
"flow-bin": "^0.75.0",
|
||||||
"husky": "^1.0.0-rc.8",
|
"husky": "^1.0.0-rc.8",
|
||||||
"jest": "^23.0.1",
|
"jest": "^23.0.1",
|
||||||
"jest-serializer-html": "^5.0.0",
|
"jest-serializer-html": "^5.0.0",
|
||||||
@@ -80,7 +80,6 @@
|
|||||||
"dist:dist": "rimraf dist/ && cp -r build/katex/ dist/"
|
"dist:dist": "rimraf dist/ && cp -r build/katex/ dist/"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"match-at": "^0.1.1",
|
|
||||||
"nomnom": "^1.8.1"
|
"nomnom": "^1.8.1"
|
||||||
},
|
},
|
||||||
"husky": {
|
"husky": {
|
||||||
|
25
src/Lexer.js
25
src/Lexer.js
@@ -12,7 +12,6 @@
|
|||||||
* kinds.
|
* kinds.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import matchAt from "match-at";
|
|
||||||
import ParseError from "./ParseError";
|
import ParseError from "./ParseError";
|
||||||
import SourceLocation from "./SourceLocation";
|
import SourceLocation from "./SourceLocation";
|
||||||
import {LexerInterface, Token} from "./Token";
|
import {LexerInterface, Token} from "./Token";
|
||||||
@@ -44,8 +43,7 @@ const controlWordWhitespaceRegex = new RegExp(
|
|||||||
const combiningDiacriticalMarkString = "[\u0300-\u036f]";
|
const combiningDiacriticalMarkString = "[\u0300-\u036f]";
|
||||||
export const combiningDiacriticalMarksEndRegex =
|
export const combiningDiacriticalMarksEndRegex =
|
||||||
new RegExp(`${combiningDiacriticalMarkString}+$`);
|
new RegExp(`${combiningDiacriticalMarkString}+$`);
|
||||||
const tokenRegex = new RegExp(
|
const tokenRegexString = `(${spaceRegexString}+)|` + // whitespace
|
||||||
`(${spaceRegexString}+)|` + // whitespace
|
|
||||||
`(${commentRegexString}` + // comments
|
`(${commentRegexString}` + // comments
|
||||||
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||||
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
||||||
@@ -54,11 +52,8 @@ const tokenRegex = new RegExp(
|
|||||||
"|\\\\verb\\*([^]).*?\\3" + // \verb*
|
"|\\\\verb\\*([^]).*?\\3" + // \verb*
|
||||||
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
|
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
|
||||||
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces
|
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces
|
||||||
`|${controlSymbolRegexString}` + // \\, \', etc.
|
`|${controlSymbolRegexString})`; // \\, \', etc.
|
||||||
")"
|
|
||||||
);
|
|
||||||
|
|
||||||
// tokenRegex has no ^ marker, as required by matchAt.
|
|
||||||
// These regexs are for matching results from tokenRegex,
|
// These regexs are for matching results from tokenRegex,
|
||||||
// so they do have ^ markers.
|
// so they do have ^ markers.
|
||||||
export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
|
export const controlWordRegex = new RegExp(`^${controlWordRegexString}`);
|
||||||
@@ -67,12 +62,12 @@ const commentRegex = new RegExp(`^${commentRegexString}`);
|
|||||||
/** Main Lexer class */
|
/** Main Lexer class */
|
||||||
export default class Lexer implements LexerInterface {
|
export default class Lexer implements LexerInterface {
|
||||||
input: string;
|
input: string;
|
||||||
pos: number;
|
tokenRegex: RegExp;
|
||||||
|
|
||||||
constructor(input: string) {
|
constructor(input: string) {
|
||||||
// Separate accents from characters
|
// Separate accents from characters
|
||||||
this.input = input;
|
this.input = input;
|
||||||
this.pos = 0;
|
this.tokenRegex = new RegExp(tokenRegexString, 'g');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -80,20 +75,17 @@ export default class Lexer implements LexerInterface {
|
|||||||
*/
|
*/
|
||||||
lex(): Token {
|
lex(): Token {
|
||||||
const input = this.input;
|
const input = this.input;
|
||||||
const pos = this.pos;
|
const pos = this.tokenRegex.lastIndex;
|
||||||
if (pos === input.length) {
|
if (pos === input.length) {
|
||||||
return new Token("EOF", new SourceLocation(this, pos, pos));
|
return new Token("EOF", new SourceLocation(this, pos, pos));
|
||||||
}
|
}
|
||||||
const match = matchAt(tokenRegex, input, pos);
|
const match = this.tokenRegex.exec(input);
|
||||||
if (match === null) {
|
if (match === null || match.index !== pos) {
|
||||||
throw new ParseError(
|
throw new ParseError(
|
||||||
`Unexpected character: '${input[pos]}'`,
|
`Unexpected character: '${input[pos]}'`,
|
||||||
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
|
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
|
||||||
}
|
}
|
||||||
let text = match[2] || " ";
|
let text = match[2] || " ";
|
||||||
const start = this.pos;
|
|
||||||
this.pos += match[0].length;
|
|
||||||
const end = this.pos;
|
|
||||||
|
|
||||||
// Trim any trailing whitespace from control word match
|
// Trim any trailing whitespace from control word match
|
||||||
const controlMatch = text.match(controlWordWhitespaceRegex);
|
const controlMatch = text.match(controlWordWhitespaceRegex);
|
||||||
@@ -104,7 +96,8 @@ export default class Lexer implements LexerInterface {
|
|||||||
if (commentRegex.test(text)) {
|
if (commentRegex.test(text)) {
|
||||||
return this.lex();
|
return this.lex();
|
||||||
} else {
|
} else {
|
||||||
return new Token(text, new SourceLocation(this, start, end));
|
return new Token(text, new SourceLocation(this, pos,
|
||||||
|
this.tokenRegex.lastIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import SourceLocation from "./SourceLocation";
|
|||||||
* Interface required to break circular dependency between Token, Lexer, and
|
* Interface required to break circular dependency between Token, Lexer, and
|
||||||
* ParseError.
|
* ParseError.
|
||||||
*/
|
*/
|
||||||
export interface LexerInterface {input: string, pos: number}
|
export interface LexerInterface {input: string, tokenRegex: RegExp}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The resulting token returned from `lex`.
|
* The resulting token returned from `lex`.
|
||||||
@@ -43,4 +43,3 @@ export class Token {
|
|||||||
return new Token(text, SourceLocation.range(this, endToken));
|
return new Token(text, SourceLocation.range(this, endToken));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -27,7 +27,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
|
|||||||
"end": 37,
|
"end": 37,
|
||||||
"lexer": {
|
"lexer": {
|
||||||
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
||||||
"pos": 56
|
"tokenRegex": {
|
||||||
|
"lastIndex": 56
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"start": 36
|
"start": 36
|
||||||
},
|
},
|
||||||
@@ -56,7 +58,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
|
|||||||
"end": 39,
|
"end": 39,
|
||||||
"lexer": {
|
"lexer": {
|
||||||
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
||||||
"pos": 56
|
"tokenRegex": {
|
||||||
|
"lastIndex": 56
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"start": 38
|
"start": 38
|
||||||
},
|
},
|
||||||
@@ -87,7 +91,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
|
|||||||
"end": 42,
|
"end": 42,
|
||||||
"lexer": {
|
"lexer": {
|
||||||
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
||||||
"pos": 56
|
"tokenRegex": {
|
||||||
|
"lastIndex": 56
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"start": 41
|
"start": 41
|
||||||
},
|
},
|
||||||
@@ -116,7 +122,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = `
|
|||||||
"end": 44,
|
"end": 44,
|
||||||
"lexer": {
|
"lexer": {
|
||||||
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
"input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}",
|
||||||
"pos": 56
|
"tokenRegex": {
|
||||||
|
"lastIndex": 56
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"start": 43
|
"start": 43
|
||||||
},
|
},
|
||||||
|
@@ -285,9 +285,9 @@ describe("Lexer:", function() {
|
|||||||
|
|
||||||
describe("#_innerLex", function() {
|
describe("#_innerLex", function() {
|
||||||
it("rejects lone surrogate char", function() {
|
it("rejects lone surrogate char", function() {
|
||||||
expect("\udcba").toFailWithParseError(
|
expect("\udcba ").toFailWithParseError(
|
||||||
"Unexpected character: '\udcba' at position 1:" +
|
"Unexpected character: '\udcba' at position 1:" +
|
||||||
" \udcba\u0332");
|
" \udcba\u0332 ");
|
||||||
});
|
});
|
||||||
it("rejects lone backslash at end of input", function() {
|
it("rejects lone backslash at end of input", function() {
|
||||||
expect("\\").toFailWithParseError(
|
expect("\\").toFailWithParseError(
|
||||||
|
@@ -23,9 +23,17 @@ const typeFirstCompare = (a, b) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const regExpReplacer = (key, value) => {
|
||||||
|
return value instanceof RegExp ? {lastIndex: value.lastIndex} : value;
|
||||||
|
};
|
||||||
|
|
||||||
const serializer = {
|
const serializer = {
|
||||||
print(val) {
|
print(val) {
|
||||||
return stringify(val, {cmp: typeFirstCompare, space: ' '});
|
return stringify(val, {
|
||||||
|
cmp: typeFirstCompare,
|
||||||
|
space: ' ',
|
||||||
|
replacer: regExpReplacer,
|
||||||
|
});
|
||||||
},
|
},
|
||||||
test(val) {
|
test(val) {
|
||||||
// Leave strings (e.g. XML) to other serializers
|
// Leave strings (e.g. XML) to other serializers
|
||||||
|
Reference in New Issue
Block a user