From 0bc31f1822695aee5b6e67154a84eff0a0084839 Mon Sep 17 00:00:00 2001 From: ylemkimon Date: Sun, 14 Oct 2018 00:58:13 +0900 Subject: [PATCH] Refactor Parser (#1723) * Move unsupported command (undefined control sequence) error to parseSymbol * Change parseGivenFunction and parseFunction to parse only function * Move \begin handling to environment.js * Remove ParsedFunc/Arg, move logics into parseGroup * Fix flow error * Remove parseFunction, rename parseGivenFunction to parseFunction * Minor fixes * Remove previously resolved TODO * Minor fixes * Update flow typing --- src/Parser.js | 382 ++++++++++++----------------------- src/environments/array.js | 7 +- src/functions/delimsizing.js | 7 +- src/functions/environment.js | 39 +++- test/errors-spec.js | 28 +-- 5 files changed, 174 insertions(+), 289 deletions(-) diff --git a/src/Parser.js b/src/Parser.js index 7499e56c..c0c05732 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -1,21 +1,20 @@ // @flow /* eslint no-constant-condition:0 */ import functions from "./functions"; -import environments from "./environments"; -import MacroExpander from "./MacroExpander"; +import MacroExpander, {implicitCommands} from "./MacroExpander"; import symbols, {ATOMS, extraLatin} from "./symbols"; import {validUnit} from "./units"; import {supportedCodepoint} from "./unicodeScripts"; import unicodeAccents from "./unicodeAccents"; import unicodeSymbols from "./unicodeSymbols"; import utils from "./utils"; -import {assertNodeType, checkNodeType} from "./parseNode"; +import {checkNodeType} from "./parseNode"; import ParseError from "./ParseError"; import {combiningDiacriticalMarksEndRegex} from "./Lexer"; import Settings from "./Settings"; import SourceLocation from "./SourceLocation"; import {Token} from "./Token"; -import type {AnyParseNode, SymbolParseNode} from "./parseNode"; +import type {ParseNode, AnyParseNode, SymbolParseNode} from "./parseNode"; import type {Atom, Group} from "./symbols"; import type {Mode, ArgType, BreakToken} from "./types"; import type {FunctionContext, FunctionSpec} from "./defineFunction"; @@ -48,33 +47,9 @@ import type {EnvSpec} from "./defineEnvironment"; * There are also extra `.handle...` functions, which pull out some reused * functionality into self-contained functions. * - * The earlier functions return ParseNodes. - * The later functions (which are called deeper in the parse) sometimes return - * ParsedFuncOrArg, which contain a ParseNode as well as some data about - * whether the parsed object is a function which is missing some arguments, or a - * standalone object which can be used as an argument to another function. + * The functions return ParseNodes. */ -type ParsedFunc = {| - type: "fn", - result: string, // Function name defined via defineFunction (e.g. "\\frac"). - token: Token, -|}; -type ParsedArg = {| - type: "arg", - result: AnyParseNode, - token: Token, -|}; -type ParsedFuncOrArg = ParsedFunc | ParsedArg; - -function newArgument(result: AnyParseNode, token: Token): ParsedArg { - return {type: "arg", result, token}; -} - -function newFunction(token: Token): ParsedFunc { - return {type: "fn", result: token.text, token}; -} - export default class Parser { mode: Mode; gullet: MacroExpander; @@ -190,12 +165,6 @@ export default class Parser { } const atom = this.parseAtom(breakOnTokenText); if (!atom) { - if (!this.settings.throwOnError && lex.text[0] === "\\") { - const errorNode = this.handleUnsupportedCmd(); - body.push(errorNode); - continue; - } - break; } body.push(atom); @@ -275,33 +244,16 @@ export default class Parser { const symbol = symbolToken.text; this.consume(); this.consumeSpaces(); // ignore spaces before sup/subscript argument - const group = this.parseGroup(); + const group = this.parseGroup(name, false, Parser.SUPSUB_GREEDINESS); if (!group) { - if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") { - return this.handleUnsupportedCmd(); - } else { - throw new ParseError( - "Expected group after '" + symbol + "'", - symbolToken - ); - } + throw new ParseError( + "Expected group after '" + symbol + "'", + symbolToken + ); } - if (group.type === "fn") { - // ^ and _ have a greediness, so handle interactions with functions' - // greediness - const funcGreediness = functions[group.result].greediness; - if (funcGreediness > Parser.SUPSUB_GREEDINESS) { - return this.parseGivenFunction(group); - } else { - throw new ParseError( - "Got function '" + group.result + "' with no arguments " + - "as " + name, symbolToken); - } - } else { - return group.result; - } + return group; } /** @@ -339,7 +291,7 @@ export default class Parser { parseAtom(breakOnTokenText?: BreakToken): ?AnyParseNode { // The body of an atom is an implicit group, so that things like // \left(x\right)^2 work correctly. - const base = this.parseImplicitGroup(breakOnTokenText); + const base = this.parseGroup("atom", false, null, breakOnTokenText); // In text mode, we don't have superscripts or subscripts if (this.mode === "text") { @@ -430,116 +382,45 @@ export default class Parser { } } - /** - * Parses an implicit group, which is a group that starts at the end of a - * specified, and ends right before a higher explicit group ends, or at EOL. It - * is used for functions that appear to affect the current style, like \Large or - * \textrm, where instead of keeping a style we just pretend that there is an - * implicit grouping after it until the end of the group. E.g. - * small text {\Large large text} small text again - */ - parseImplicitGroup(breakOnTokenText?: BreakToken): ?AnyParseNode { - const start = this.parseSymbol(); - - if (start == null) { - // If we didn't get anything we handle, fall back to parseFunction - return this.parseFunction(); - } else if (start.type === "arg") { - // Defer to parseGivenFunction if it's not a function we handle - return this.parseGivenFunction(start); - } - - const func = start.result; - - if (func === "\\begin") { - // begin...end is similar to left...right - const begin = - assertNodeType(this.parseGivenFunction(start), "environment"); - - const envName = begin.name; - if (!environments.hasOwnProperty(envName)) { - throw new ParseError( - "No such environment: " + envName, begin.nameGroup); - } - // Build the environment object. Arguments and other information will - // be made available to the begin and end methods using properties. - const env = environments[envName]; - const {args, optArgs} = - this.parseArguments("\\begin{" + envName + "}", env); - const context = { - mode: this.mode, - envName: envName, - parser: this, - }; - const result = env.handler(context, args, optArgs); - this.expect("\\end", false); - const endNameToken = this.nextToken; - let end = this.parseFunction(); - if (!end) { - throw new ParseError("failed to parse function after \\end"); - } - end = assertNodeType(end, "environment"); - if (end.name !== envName) { - throw new ParseError( - `Mismatch: \\begin{${envName}} matched by \\end{${end.name}}`, - endNameToken); - } - return result; - } else { - // Defer to parseGivenFunction if it's not a function we handle - return this.parseGivenFunction(start, breakOnTokenText); - } - } - /** * Parses an entire function, including its base and all of its arguments. - * It also handles the case where the parsed node is not a function. */ - parseFunction(): ?AnyParseNode { - const baseGroup = this.parseGroup(); - return baseGroup ? this.parseGivenFunction(baseGroup) : null; - } - - /** - * Same as parseFunction(), except that the base is provided, guaranteeing a - * non-nullable result. - */ - parseGivenFunction( - baseGroup: ParsedFuncOrArg, + parseFunction( breakOnTokenText?: BreakToken, - ): AnyParseNode { - if (baseGroup.type === "fn") { - const func = baseGroup.result; - const funcData = functions[func]; - if (this.mode === "text" && !funcData.allowedInText) { - throw new ParseError( - "Can't use function '" + func + "' in text mode", - baseGroup.token); - } else if (this.mode === "math" && - funcData.allowedInMath === false) { - throw new ParseError( - "Can't use function '" + func + "' in math mode", - baseGroup.token); - } - - // Consume the command token after possibly switching to the - // mode specified by the function (for instant mode switching), - // and then immediately switch back. - if (funcData.consumeMode) { - const oldMode = this.mode; - this.switchMode(funcData.consumeMode); - this.consume(); - this.switchMode(oldMode); - } else { - this.consume(); - } - const {args, optArgs} = this.parseArguments(func, funcData); - const token = baseGroup.token; - return this.callFunction( - func, args, optArgs, token, breakOnTokenText); - } else { - return baseGroup.result; + name?: string, // For error reporting. + greediness?: ?number, + ): ?AnyParseNode { + const token = this.nextToken; + const func = token.text; + const funcData = functions[func]; + if (!funcData) { + return null; } + if (greediness != null && funcData.greediness <= greediness) { + throw new ParseError( + "Got function '" + func + "' with no arguments" + + (name ? " as " + name : ""), token); + } else if (this.mode === "text" && !funcData.allowedInText) { + throw new ParseError( + "Can't use function '" + func + "' in text mode", token); + } else if (this.mode === "math" && funcData.allowedInMath === false) { + throw new ParseError( + "Can't use function '" + func + "' in math mode", token); + } + + // Consume the command token after possibly switching to the + // mode specified by the function (for instant mode switching), + // and then immediately switch back. + if (funcData.consumeMode) { + const oldMode = this.mode; + this.switchMode(funcData.consumeMode); + this.consume(); + this.switchMode(oldMode); + } else { + this.consume(); + } + const {args, optArgs} = this.parseArguments(func, funcData); + return this.callFunction(func, args, optArgs, token, breakOnTokenText); } /** @@ -605,37 +486,17 @@ export default class Parser { this.consumeSpaces(); } const nextToken = this.nextToken; - let arg = argType ? - this.parseGroupOfType(argType, isOptional) : - this.parseGroup(isOptional); + const arg = this.parseGroupOfType("argument to '" + func + "'", + argType, isOptional, baseGreediness); if (!arg) { if (isOptional) { optArgs.push(null); continue; } - if (!this.settings.throwOnError && - this.nextToken.text[0] === "\\") { - arg = newArgument(this.handleUnsupportedCmd(), nextToken); - } else { - throw new ParseError( - "Expected group after '" + func + "'", nextToken); - } + throw new ParseError( + "Expected group after '" + func + "'", nextToken); } - let argNode: AnyParseNode; - if (arg.type === "fn") { - const argGreediness = - functions[arg.result].greediness; - if (argGreediness > baseGreediness) { - argNode = this.parseGivenFunction(arg); - } else { - throw new ParseError( - "Got function '" + arg.result + "' as " + - "argument to '" + func + "'", nextToken); - } - } else { - argNode = arg.result; - } - (isOptional ? optArgs : args).push(argNode); + (isOptional ? optArgs : args).push(arg); } return {args, optArgs}; @@ -645,35 +506,29 @@ export default class Parser { * Parses a group when the mode is changing. */ parseGroupOfType( - type: ArgType, // Used to describe the mode in error messages. + name: string, + type: ?ArgType, optional: boolean, - ): ?ParsedFuncOrArg { - // Handle `original` argTypes - if (type === "original") { - type = this.mode; + greediness: ?number, + ): ?AnyParseNode { + switch (type) { + case "color": + return this.parseColorGroup(optional); + case "size": + return this.parseSizeGroup(optional); + case "url": + return this.parseUrlGroup(optional); + case "math": + case "text": + return this.parseGroup(name, optional, greediness, undefined, type); + case "original": + case null: + case undefined: + return this.parseGroup(name, optional, greediness); + default: + throw new ParseError( + "Unknown group type as " + name, this.nextToken); } - - if (type === "color") { - return this.parseColorGroup(optional); - } - if (type === "size") { - return this.parseSizeGroup(optional); - } - if (type === "url") { - return this.parseUrlGroup(optional); - } - if (type === "raw") { - const token = this.parseStringGroup("raw", optional, true); - return token ? newArgument({ - type: "raw", - mode: this.mode, - string: token.text, - }, token) : null; - } - - // By the time we get here, type is one of "text" or "math". - // Specify this as mode to parseGroup. - return this.parseGroup(optional, type); } consumeSpaces() { @@ -796,7 +651,7 @@ export default class Parser { /** * Parses a color description. */ - parseColorGroup(optional: boolean): ?ParsedArg { + parseColorGroup(optional: boolean): ?ParseNode<"color-token"> { const res = this.parseStringGroup("color", optional); if (!res) { return null; @@ -812,17 +667,17 @@ export default class Parser { // Predefined color names are all missed by this RegEx pattern. color = "#" + color; } - return newArgument({ + return { type: "color-token", mode: this.mode, color, - }, res); + }; } /** * Parses a size specification, consisting of magnitude and unit. */ - parseSizeGroup(optional: boolean): ?ParsedArg { + parseSizeGroup(optional: boolean): ?ParseNode<"size"> { let res; let isBlank = false; if (!optional && this.nextToken.text !== "{") { @@ -852,18 +707,18 @@ export default class Parser { if (!validUnit(data)) { throw new ParseError("Invalid unit: '" + data.unit + "'", res); } - return newArgument({ + return { type: "size", mode: this.mode, value: data, isBlank, - }, res); + }; } /** * Parses an URL, checking escaped letters and allowed protocols. */ - parseUrlGroup(optional: boolean): ?ParsedArg { + parseUrlGroup(optional: boolean): ?ParseNode<"url"> { const res = this.parseStringGroup("url", optional, true); // get raw string if (!res) { return null; @@ -881,32 +736,43 @@ export default class Parser { throw new ParseError( `Forbidden protocol '${protocol}'`, res); } - return newArgument({ + return { type: "url", mode: this.mode, url, - }, res); + }; } /** * If `optional` is false or absent, this parses an ordinary group, * which is either a single nucleus (like "x") or an expression - * in braces (like "{x+y}"). + * in braces (like "{x+y}") or an implicit group, a group that starts + * at the current position, and ends right before a higher explicit + * group ends, or at EOF. * If `optional` is true, it parses either a bracket-delimited expression * (like "[x+y]") or returns null to indicate the absence of a * bracket-enclosed group. * If `mode` is present, switches to that mode while parsing the group, * and switches back after. */ - parseGroup(optional?: boolean, mode?: Mode): ?ParsedFuncOrArg { + parseGroup( + name: string, // For error reporting. + optional?: boolean, + greediness?: ?number, + breakOnTokenText?: BreakToken, + mode?: Mode, + ): ?AnyParseNode { const outerMode = this.mode; const firstToken = this.nextToken; + const text = firstToken.text; + // Switch to specified mode + if (mode) { + this.switchMode(mode); + } + + let result; // Try to parse an open brace - if (this.nextToken.text === (optional ? "[" : "{")) { - // Switch to specified mode before we expand symbol after brace - if (mode) { - this.switchMode(mode); - } + if (text === (optional ? "[" : "{")) { // Start a new group namespace this.gullet.beginGroup(); // If we get a brace, parse an expression @@ -921,23 +787,35 @@ export default class Parser { this.gullet.endGroup(); // Make sure we get a close brace this.expect(optional ? "]" : "}"); - return newArgument({ + return { type: "ordgroup", mode: this.mode, loc: SourceLocation.range(firstToken, lastToken), body: expression, - }, firstToken.range(lastToken, firstToken.text)); + }; + } else if (optional) { + // Return nothing for an optional group + result = null; } else { - // Otherwise, just return a nucleus, or nothing for an optional group - if (mode) { - this.switchMode(mode); + // If there exists a function with this name, parse the function. + // Otherwise, just return a nucleus + result = this.parseFunction(breakOnTokenText, name, greediness) || + this.parseSymbol(); + if (result == null && text[0] === "\\" && + !implicitCommands.hasOwnProperty(text)) { + if (this.settings.throwOnError) { + throw new ParseError( + "Undefined control sequence: " + text, firstToken); + } + result = this.handleUnsupportedCmd(); } - const result = optional ? null : this.parseSymbol(); - if (mode) { - this.switchMode(outerMode); - } - return result; } + + // Switch mode back + if (mode) { + this.switchMode(outerMode); + } + return result; } /** @@ -986,20 +864,14 @@ export default class Parser { } /** - * Parse a single symbol out of the string. Here, we handle both the functions - * we have defined, as well as the single character symbols + * Parse a single symbol out of the string. Here, we handle single character + * symbols and special functions like verbatim */ - parseSymbol(): ?ParsedFuncOrArg { + parseSymbol(): ?AnyParseNode { const nucleus = this.nextToken; let text = nucleus.text; - if (functions[text]) { - // If there exists a function with this name, we return the - // function and say that it is a function. - // The token will be consumed later in parseGivenFunction - // (after possibly switching modes). - return newFunction(nucleus); - } else if (/^\\verb[^a-zA-Z]/.test(text)) { + if (/^\\verb[^a-zA-Z]/.test(text)) { this.consume(); let arg = text.slice(5); const star = (arg.charAt(0) === "*"); @@ -1013,12 +885,12 @@ export default class Parser { please report what input caused this bug`); } arg = arg.slice(1, -1); // remove first and last char - return newArgument({ + return { type: "verb", mode: "text", body: arg, star, - }, nucleus); + }; } else if (text === "%") { this.consumeComment(); return this.parseSymbol(); @@ -1123,6 +995,6 @@ export default class Parser { }; } } - return newArgument(symbol, nucleus); + return symbol; } } diff --git a/src/environments/array.js b/src/environments/array.js index 43ff075c..8537472f 100644 --- a/src/environments/array.js +++ b/src/environments/array.js @@ -114,11 +114,8 @@ function parseArray( } break; } else if (next === "\\cr") { - const cr = parser.parseFunction(); - if (!cr) { - throw new ParseError(`Failed to parse function after ${next}`); - } - rowGaps.push(assertNodeType(cr, "cr").size); + const cr = assertNodeType(parser.parseFunction(), "cr"); + rowGaps.push(cr.size); // check for \hline(s) following the row separator hLinesBeforeRow.push(getHLines(parser)); diff --git a/src/functions/delimsizing.js b/src/functions/delimsizing.js index f864818f..d81a510e 100644 --- a/src/functions/delimsizing.js +++ b/src/functions/delimsizing.js @@ -170,16 +170,13 @@ defineFunction({ --parser.leftrightDepth; // Check the next token parser.expect("\\right", false); - const right = parser.parseFunction(); - if (!right) { - throw new ParseError('failed to parse function after \\right'); - } + const right = assertNodeType(parser.parseFunction(), "leftright-right"); return { type: "leftright", mode: parser.mode, body, left: delim.text, - right: assertNodeType(right, "leftright-right").delim, + right: right.delim, }; }, htmlBuilder: (group, options) => { diff --git a/src/functions/environment.js b/src/functions/environment.js index fdfbd3d9..d65744eb 100644 --- a/src/functions/environment.js +++ b/src/functions/environment.js @@ -2,9 +2,11 @@ import defineFunction from "../defineFunction"; import ParseError from "../ParseError"; import {assertNodeType} from "../parseNode"; +import environments from "../environments"; // Environment delimiters. HTML/MathML rendering is defined in the corresponding // defineEnvironment definitions. +// $FlowFixMe, "environment" handler returns an environment ParseNode defineFunction({ type: "environment", names: ["\\begin", "\\end"], @@ -12,19 +14,48 @@ defineFunction({ numArgs: 1, argTypes: ["text"], }, - handler({parser}, args) { + handler({parser, funcName}, args) { const nameGroup = args[0]; if (nameGroup.type !== "ordgroup") { throw new ParseError("Invalid environment name", nameGroup); } - let name = ""; + let envName = ""; for (let i = 0; i < nameGroup.body.length; ++i) { - name += assertNodeType(nameGroup.body[i], "textord").text; + envName += assertNodeType(nameGroup.body[i], "textord").text; } + + if (funcName === "\\begin") { + // begin...end is similar to left...right + if (!environments.hasOwnProperty(envName)) { + throw new ParseError( + "No such environment: " + envName, nameGroup); + } + // Build the environment object. Arguments and other information will + // be made available to the begin and end methods using properties. + const env = environments[envName]; + const {args, optArgs} = + parser.parseArguments("\\begin{" + envName + "}", env); + const context = { + mode: parser.mode, + envName, + parser, + }; + const result = env.handler(context, args, optArgs); + parser.expect("\\end", false); + const endNameToken = parser.nextToken; + const end = assertNodeType(parser.parseFunction(), "environment"); + if (end.name !== envName) { + throw new ParseError( + `Mismatch: \\begin{${envName}} matched by \\end{${end.name}}`, + endNameToken); + } + return result; + } + return { type: "environment", mode: parser.mode, - name, + name: envName, nameGroup, }; }, diff --git a/test/errors-spec.js b/test/errors-spec.js index e41091fc..70aea787 100644 --- a/test/errors-spec.js +++ b/test/errors-spec.js @@ -29,7 +29,7 @@ describe("Parser:", function() { it("rejects \\sqrt as argument to ^", function() { expect`1^\sqrt{2}`.toFailWithParseError( "Got function '\\sqrt' with no arguments as superscript" + - " at position 2: 1^̲\\sqrt{2}"); + " at position 3: 1^\\̲s̲q̲r̲t̲{2}"); }); }); @@ -106,28 +106,17 @@ describe("Parser:", function() { " at position 10: 1^{2\\sqrt}̲"); }); it("complains about functions as arguments to others", function() { - // TODO: The position looks pretty wrong here expect`\sqrt\over2`.toFailWithParseError( - "Got function '\\over' as argument to '\\sqrt'" + - " at position 6: \\sqrt\\̲o̲v̲e̲r̲2"); + "Got function '\\over' with no arguments as argument to" + + " '\\sqrt' at position 6: \\sqrt\\̲o̲v̲e̲r̲2"); }); }); - describe("#parseArguments", function() { - it("complains about missing argument at end of input", function() { - expect`2\sqrt`.toFailWithParseError( - "Expected group after '\\sqrt' at end of input: 2\\sqrt"); - }); - it("complains about missing argument at end of group", function() { - expect`1^{2\sqrt}`.toFailWithParseError( - "Expected group after '\\sqrt'" + - " at position 10: 1^{2\\sqrt}̲"); - }); - it("complains about functions as arguments to others", function() { - // TODO: The position looks pretty wrong here - expect`\sqrt\over2`.toFailWithParseError( - "Got function '\\over' as argument to '\\sqrt'" + - " at position 6: \\sqrt\\̲o̲v̲e̲r̲2"); + describe("#parseGroup", function() { + it("complains about undefined control sequence", function() { + expect`\xyz`.toFailWithParseError( + "Undefined control sequence: \\xyz" + + " at position 1: \\̲x̲y̲z̲"); }); }); @@ -248,7 +237,6 @@ describe("environments.js:", function() { describe("array environment", function() { it("rejects unknown column types", function() { - // TODO: The error position here looks strange expect`\begin{array}{cba}\end{array}`.toFailWithParseError( "Unknown column alignment: b at position 16:" + " \\begin{array}{cb̲a}\\end{array}");