From cb7f166a7ed9a9d533c8a74ec6b0aa8ebc30a759 Mon Sep 17 00:00:00 2001 From: Ashish Myles Date: Wed, 6 Sep 2017 21:39:50 -0400 Subject: [PATCH] To @flow: MacroExpander. (#845) * To @flow: MacroExpander. * Explicitly pass context into defineMacro called with a function. Instead of binding `this` when `defineMacro` is invoked with a function, we now pass an explicit context as a parameter to the function. This is a bit more obvious and is currently more type-safe due to a bug in `@flow`: https://github.com/facebook/flow/issues/4809 * Per feedback, rename some types, fields, and variables. --- flow-typed/object-assign.js | 6 ++ src/MacroExpander.js | 112 +++++++++++++++++++++--------------- src/macros.js | 51 ++++++++++++---- 3 files changed, 113 insertions(+), 56 deletions(-) create mode 100644 flow-typed/object-assign.js diff --git a/flow-typed/object-assign.js b/flow-typed/object-assign.js new file mode 100644 index 00000000..bc78d3d0 --- /dev/null +++ b/flow-typed/object-assign.js @@ -0,0 +1,6 @@ +declare module 'object-assign' { + declare module.exports: + (target: {[string]: T}, ...sources: Array<{[string]: T}>) + => {[string]: T}; +} + diff --git a/src/MacroExpander.js b/src/MacroExpander.js index 9b5ee4c1..608c4eff 100644 --- a/src/MacroExpander.js +++ b/src/MacroExpander.js @@ -1,3 +1,4 @@ +// @flow /** * This file contains the “gullet” where macros are expanded * until only non-macro tokens remain. @@ -9,8 +10,15 @@ import builtinMacros from "./macros"; import ParseError from "./ParseError"; import objectAssign from "object-assign"; -class MacroExpander { - constructor(input, macros) { +import type {MacroContextInterface, MacroMap, MacroExpansion} from "./macros"; + +export default class MacroExpander implements MacroContextInterface { + lexer: Lexer; + macros: MacroMap; + stack: Token[]; + discardedWhiteSpace: Token[]; + + constructor(input: string, macros: MacroMap) { this.lexer = new Lexer(input); this.macros = objectAssign({}, builtinMacros, macros); this.stack = []; // contains tokens in REVERSE order @@ -21,7 +29,7 @@ class MacroExpander { * Returns the topmost token on the stack, without expanding it. * Similar in behavior to TeX's `\futurelet`. */ - future() { + future(): Token { if (this.stack.length === 0) { this.stack.push(this.lexer.lex()); } @@ -31,7 +39,7 @@ class MacroExpander { /** * Remove and return the next unexpanded token. */ - popToken() { + popToken(): Token { this.future(); // ensure non-empty stack return this.stack.pop(); } @@ -70,7 +78,7 @@ class MacroExpander { * i.e. things like those defined by \def\foo#1\end{…}. * See the TeX book page 202ff. for details on how those should behave. */ - expandOnce() { + expandOnce(): Token | Token[] { const topToken = this.popToken(); const name = topToken.text; const isMacro = (name.charAt(0) === "\\"); @@ -83,38 +91,16 @@ class MacroExpander { this.stack.push(topToken); return topToken; } - let expansion = this.macros[name]; - if (typeof expansion === "function") { - expansion = expansion.call(this); - } - if (typeof expansion === "string") { - let numArgs = 0; - if (expansion.indexOf("#") !== -1) { - const stripped = expansion.replace(/##/g, ""); - while (stripped.indexOf("#" + (numArgs + 1)) !== -1) { - ++numArgs; - } - } - const bodyLexer = new Lexer(expansion); - expansion = []; - let tok = bodyLexer.lex(); - while (tok.text !== "EOF") { - expansion.push(tok); - tok = bodyLexer.lex(); - } - expansion.reverse(); // to fit in with stack using push and pop - expansion.numArgs = numArgs; - // TODO: Could cache macro expansions if it originally came as a - // String (but not those that come in as a Function). - } - if (expansion.numArgs) { - const args = []; + const {tokens, numArgs} = this._getExpansion(name); + let expansion = tokens; + if (numArgs) { + const args: Token[][] = []; // obtain arguments, either single token or balanced {…} group - for (let i = 0; i < expansion.numArgs; ++i) { + for (let i = 0; i < numArgs; ++i) { this.consumeSpaces(); // ignore spaces before each argument const startOfArg = this.popToken(); if (startOfArg.text === "{") { - const arg = []; + const arg: Token[] = []; let depth = 1; while (depth !== 0) { const tok = this.popToken(); @@ -153,12 +139,8 @@ class MacroExpander { if (tok.text === "#") { // ## → # expansion.splice(i + 1, 1); // drop first # } else if (/^[1-9]$/.test(tok.text)) { - // expansion.splice(i, 2, arg[0], arg[1], …) - // to replace placeholder with the indicated argument. - // TODO: use spread once we move to ES2015 - expansion.splice.apply( - expansion, - [i, 2].concat(args[tok.text - 1])); + // replace the placeholder with the indicated argument + expansion.splice(i, 2, ...args[tok.text - 1]); } else { throw new ParseError( "Not a valid argument number", @@ -168,7 +150,7 @@ class MacroExpander { } } // Concatenate expansion onto top of stack. - this.stack.push.apply(this.stack, expansion); + this.stack.push(...expansion); return expansion; } @@ -178,7 +160,7 @@ class MacroExpander { * Similar in behavior to TeX's `\expandafter\futurelet`. * Equivalent to expandOnce() followed by future(). */ - expandAfterFuture() { + expandAfterFuture(): Token { this.expandOnce(); return this.future(); } @@ -186,7 +168,7 @@ class MacroExpander { /** * Recursively expand first token, then return first non-expandable token. */ - expandNextToken() { + expandNextToken(): Token { for (;;) { const expanded = this.expandOnce(); // expandOnce returns Token if and only if it's fully expanded. @@ -200,6 +182,47 @@ class MacroExpander { } } } + + // Flow unable to figure out that this pathway is impossible. + // https://github.com/facebook/flow/issues/4808 + throw new Error(); // eslint-disable-line no-unreachable + } + + /** + * Returns the expanded macro as a reversed array of tokens and a macro + * argument count. + * Caches macro expansions for those that were defined simple TeX strings. + */ + _getExpansion(name: string): MacroExpansion { + const definition = this.macros[name]; + const expansion = + typeof definition === "function" ? definition(this) : definition; + if (typeof expansion === "string") { + let numArgs = 0; + if (expansion.indexOf("#") !== -1) { + const stripped = expansion.replace(/##/g, ""); + while (stripped.indexOf("#" + (numArgs + 1)) !== -1) { + ++numArgs; + } + } + const bodyLexer = new Lexer(expansion); + const tokens = []; + let tok = bodyLexer.lex(); + while (tok.text !== "EOF") { + tokens.push(tok); + tok = bodyLexer.lex(); + } + tokens.reverse(); // to fit in with stack using push and pop + const expanded = {tokens, numArgs}; + // Cannot cache a macro defined using a function since it relies on + // parser context. + if (typeof definition !== "function") { + this.macros[name] = expanded; + } + return expanded; + } + + return expansion; } /** @@ -210,7 +233,7 @@ class MacroExpander { * Any skipped whitespace is stored in `this.discardedWhiteSpace` * so that `unget` can correctly undo the effects of `get`. */ - get(ignoreSpace) { + get(ignoreSpace: boolean): Token { this.discardedWhiteSpace = []; let token = this.expandNextToken(); if (ignoreSpace) { @@ -229,7 +252,7 @@ class MacroExpander { * was got in the old mode but should get got again in a new mode * with possibly different whitespace handling. */ - unget(token) { + unget(token: Token) { this.stack.push(token); while (this.discardedWhiteSpace.length !== 0) { this.stack.push(this.discardedWhiteSpace.pop()); @@ -237,4 +260,3 @@ class MacroExpander { } } -module.exports = MacroExpander; diff --git a/src/macros.js b/src/macros.js index f290d40e..e04ea40c 100644 --- a/src/macros.js +++ b/src/macros.js @@ -6,10 +6,39 @@ import symbols from "./symbols"; import utils from "./utils"; +import {Token} from "./Token"; -// This function might one day accept additional argument and do more things. -function defineMacro(name: string, body: string | () => string) { - module.exports[name] = body; +/** + * Provides context to macros defined by functions. Implemented by + * MacroExpander. + */ +export interface MacroContextInterface { + /** + * Returns the topmost token on the stack, without expanding it. + * Similar in behavior to TeX's `\futurelet`. + */ + future(): Token; + + /** + * Expand the next token only once (if possible), and return the resulting + * top token on the stack (without removing anything from the stack). + * Similar in behavior to TeX's `\expandafter\futurelet`. + */ + expandAfterFuture(): Token; +} + +/** Macro tokens (in reverse order). */ +export type MacroExpansion = {tokens: Token[], numArgs: number}; + +type MacroDefinition = string | (MacroContextInterface => string) | MacroExpansion; +export type MacroMap = {[string]: MacroDefinition}; + +const builtinMacros: MacroMap = {}; +export default builtinMacros; + +// This function might one day accept an additional argument and do more things. +function defineMacro(name: string, body: string | MacroContextInterface => string) { + builtinMacros[name] = body; } ////////////////////////////////////////////////////////////////////// @@ -107,14 +136,14 @@ const dotsByToken = { '\\DOTSX': '\\dotsx', }; -defineMacro("\\dots", function() { +defineMacro("\\dots", function(context) { // TODO: If used in text mode, should expand to \textellipsis. // However, in KaTeX, \textellipsis and \ldots behave the same // (in text mode), and it's unlikely we'd see any of the math commands // that affect the behavior of \dots when in text mode. So fine for now // (until we support \ifmmode ... \else ... \fi). let thedots = '\\dotso'; - const next = this.expandAfterFuture().text; + const next = context.expandAfterFuture().text; if (next in dotsByToken) { thedots = dotsByToken[next]; } else if (next.substr(0, 4) === '\\not') { @@ -152,8 +181,8 @@ const spaceAfterDots = { ',': true, }; -defineMacro("\\dotso", function() { - const next = this.future().text; +defineMacro("\\dotso", function(context) { + const next = context.future().text; if (next in spaceAfterDots) { return "\\ldots\\,"; } else { @@ -161,8 +190,8 @@ defineMacro("\\dotso", function() { } }); -defineMacro("\\dotsc", function() { - const next = this.future().text; +defineMacro("\\dotsc", function(context) { + const next = context.future().text; // \dotsc uses \extra@ but not \extrap@, instead specially checking for // ';' and '.', but doesn't check for ','. if (next in spaceAfterDots && next !== ',') { @@ -172,8 +201,8 @@ defineMacro("\\dotsc", function() { } }); -defineMacro("\\cdots", function() { - const next = this.future().text; +defineMacro("\\cdots", function(context) { + const next = context.future().text; if (next in spaceAfterDots) { return "\\@cdots\\,"; } else {