diff --git a/.flowconfig b/.flowconfig index ba88cc40..357cbcc4 100644 --- a/.flowconfig +++ b/.flowconfig @@ -4,8 +4,6 @@ [include] [libs] -flow-typed -./node_modules/stylelint/decls [lints] diff --git a/flow-typed/match-at.js b/flow-typed/match-at.js deleted file mode 100644 index 86444b04..00000000 --- a/flow-typed/match-at.js +++ /dev/null @@ -1,3 +0,0 @@ -declare module 'match-at' { - declare module.exports: (re: RegExp, str: string, pos: number) => (Array|null); -} diff --git a/flow-typed/object-assign.js b/flow-typed/object-assign.js deleted file mode 100644 index bc78d3d0..00000000 --- a/flow-typed/object-assign.js +++ /dev/null @@ -1,6 +0,0 @@ -declare module 'object-assign' { - declare module.exports: - (target: {[string]: T}, ...sources: Array<{[string]: T}>) - => {[string]: T}; -} - diff --git a/package-lock.json b/package-lock.json index d9fcb197..e558ead4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4212,9 +4212,9 @@ "dev": true }, "flow-bin": { - "version": "0.74.0", - "resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.74.0.tgz", - "integrity": "sha512-tIN9J5qg71S4UbofCu80tve8a+p7Hj7ytwUtu79cLg9KJVVTNnVVJXKgCghVzaZT1Rvl9SMHVPlDs9uYhPHEGQ==", + "version": "0.75.0", + "resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.75.0.tgz", + "integrity": "sha1-uW0e6Z07RGoyJr5mtAEyJM6d8mA=", "dev": true }, "flush-write-stream": { @@ -7430,11 +7430,6 @@ "integrity": "sha512-NcWuJFHDA8V3wkDgR/j4+gZx+YQwstPgfQDV8ndUeWWzta3dnDTBxpVzqS9lkmJAuV5YX35lmyojl6HO5JXAgw==", "dev": true }, - "match-at": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/match-at/-/match-at-0.1.1.tgz", - "integrity": "sha512-h4Yd392z9mST+dzc+yjuybOGFNOZjmXIPKWjxBd1Bb23r4SmDOsk2NYCU2BMUBGbSpZqwVsZYNq26QS3xfaT3Q==" - }, "math-expression-evaluator": { "version": "1.2.17", "resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz", diff --git a/package.json b/package.json index 26d2b6e9..d3bf2a4a 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,7 @@ "eslint": "^5.0.0", "eslint-plugin-flowtype": "^2.40.1", "file-loader": "^1.1.11", - "flow-bin": "^0.74.0", + "flow-bin": "^0.75.0", "husky": "^1.0.0-rc.8", "jest": "^23.0.1", "jest-serializer-html": "^5.0.0", @@ -80,7 +80,6 @@ "dist:dist": "rimraf dist/ && cp -r build/katex/ dist/" }, "dependencies": { - "match-at": "^0.1.1", "nomnom": "^1.8.1" }, "husky": { diff --git a/src/Lexer.js b/src/Lexer.js index c6772c75..87e1504d 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -12,7 +12,6 @@ * kinds. */ -import matchAt from "match-at"; import ParseError from "./ParseError"; import SourceLocation from "./SourceLocation"; import {LexerInterface, Token} from "./Token"; @@ -44,8 +43,7 @@ const controlWordWhitespaceRegex = new RegExp( const combiningDiacriticalMarkString = "[\u0300-\u036f]"; export const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMarkString}+$`); -const tokenRegex = new RegExp( - `(${spaceRegexString}+)|` + // whitespace +const tokenRegexString = `(${spaceRegexString}+)|` + // whitespace `(${commentRegexString}` + // comments "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint `${combiningDiacriticalMarkString}*` + // ...plus accents @@ -54,11 +52,8 @@ const tokenRegex = new RegExp( "|\\\\verb\\*([^]).*?\\3" + // \verb* "|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred `|${controlWordWhitespaceRegexString}` + // \macroName + spaces - `|${controlSymbolRegexString}` + // \\, \', etc. - ")" -); + `|${controlSymbolRegexString})`; // \\, \', etc. -// tokenRegex has no ^ marker, as required by matchAt. // These regexs are for matching results from tokenRegex, // so they do have ^ markers. export const controlWordRegex = new RegExp(`^${controlWordRegexString}`); @@ -67,12 +62,12 @@ const commentRegex = new RegExp(`^${commentRegexString}`); /** Main Lexer class */ export default class Lexer implements LexerInterface { input: string; - pos: number; + tokenRegex: RegExp; constructor(input: string) { // Separate accents from characters this.input = input; - this.pos = 0; + this.tokenRegex = new RegExp(tokenRegexString, 'g'); } /** @@ -80,20 +75,17 @@ export default class Lexer implements LexerInterface { */ lex(): Token { const input = this.input; - const pos = this.pos; + const pos = this.tokenRegex.lastIndex; if (pos === input.length) { return new Token("EOF", new SourceLocation(this, pos, pos)); } - const match = matchAt(tokenRegex, input, pos); - if (match === null) { + const match = this.tokenRegex.exec(input); + if (match === null || match.index !== pos) { throw new ParseError( `Unexpected character: '${input[pos]}'`, new Token(input[pos], new SourceLocation(this, pos, pos + 1))); } let text = match[2] || " "; - const start = this.pos; - this.pos += match[0].length; - const end = this.pos; // Trim any trailing whitespace from control word match const controlMatch = text.match(controlWordWhitespaceRegex); @@ -104,7 +96,8 @@ export default class Lexer implements LexerInterface { if (commentRegex.test(text)) { return this.lex(); } else { - return new Token(text, new SourceLocation(this, start, end)); + return new Token(text, new SourceLocation(this, pos, + this.tokenRegex.lastIndex)); } } } diff --git a/src/Token.js b/src/Token.js index 30c90dd8..2a26023f 100644 --- a/src/Token.js +++ b/src/Token.js @@ -5,7 +5,7 @@ import SourceLocation from "./SourceLocation"; * Interface required to break circular dependency between Token, Lexer, and * ParseError. */ -export interface LexerInterface {input: string, pos: number} +export interface LexerInterface {input: string, tokenRegex: RegExp} /** * The resulting token returned from `lex`. @@ -43,4 +43,3 @@ export class Token { return new Token(text, SourceLocation.range(this, endToken)); } } - diff --git a/test/__snapshots__/katex-spec.js.snap b/test/__snapshots__/katex-spec.js.snap index 41708d26..0c74ea57 100755 --- a/test/__snapshots__/katex-spec.js.snap +++ b/test/__snapshots__/katex-spec.js.snap @@ -27,7 +27,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = ` "end": 37, "lexer": { "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", - "pos": 56 + "tokenRegex": { + "lastIndex": 56 + } }, "start": 36 }, @@ -56,7 +58,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = ` "end": 39, "lexer": { "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", - "pos": 56 + "tokenRegex": { + "lastIndex": 56 + } }, "start": 38 }, @@ -87,7 +91,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = ` "end": 42, "lexer": { "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", - "pos": 56 + "tokenRegex": { + "lastIndex": 56 + } }, "start": 41 }, @@ -116,7 +122,9 @@ exports[`A begin/end parser should grab \\arraystretch 1`] = ` "end": 44, "lexer": { "input": "\\\\def\\\\arraystretch{1.5}\\\\begin{matrix}a&b\\\\\\\\c&d\\\\end{matrix}", - "pos": 56 + "tokenRegex": { + "lastIndex": 56 + } }, "start": 43 }, diff --git a/test/errors-spec.js b/test/errors-spec.js index 23c75b48..4e26fe08 100644 --- a/test/errors-spec.js +++ b/test/errors-spec.js @@ -285,9 +285,9 @@ describe("Lexer:", function() { describe("#_innerLex", function() { it("rejects lone surrogate char", function() { - expect("\udcba").toFailWithParseError( + expect("\udcba ").toFailWithParseError( "Unexpected character: '\udcba' at position 1:" + - " \udcba\u0332"); + " \udcba\u0332 "); }); it("rejects lone backslash at end of input", function() { expect("\\").toFailWithParseError( diff --git a/test/setup.js b/test/setup.js index b79e429f..0e7bdedc 100644 --- a/test/setup.js +++ b/test/setup.js @@ -23,9 +23,17 @@ const typeFirstCompare = (a, b) => { } }; +const regExpReplacer = (key, value) => { + return value instanceof RegExp ? {lastIndex: value.lastIndex} : value; +}; + const serializer = { print(val) { - return stringify(val, {cmp: typeFirstCompare, space: ' '}); + return stringify(val, { + cmp: typeFirstCompare, + space: ' ', + replacer: regExpReplacer, + }); }, test(val) { // Leave strings (e.g. XML) to other serializers