To @flow: MacroExpander. (#845)

* To @flow: MacroExpander.

* Explicitly pass context into defineMacro called with a function.

Instead of binding `this` when `defineMacro` is invoked
with a function, we now pass an explicit context as a
parameter to the function. This is a bit more obvious
and is currently more type-safe due to a bug in `@flow`:
https://github.com/facebook/flow/issues/4809

* Per feedback, rename some types, fields, and variables.
This commit is contained in:
Ashish Myles
2017-09-06 21:39:50 -04:00
committed by Kevin Barabash
parent feef4107df
commit cb7f166a7e
3 changed files with 113 additions and 56 deletions

6
flow-typed/object-assign.js vendored Normal file
View File

@@ -0,0 +1,6 @@
declare module 'object-assign' {
declare module.exports:
<T>(target: {[string]: T}, ...sources: Array<{[string]: T}>)
=> {[string]: T};
}

View File

@@ -1,3 +1,4 @@
// @flow
/**
* This file contains the “gullet” where macros are expanded
* until only non-macro tokens remain.
@@ -9,8 +10,15 @@ import builtinMacros from "./macros";
import ParseError from "./ParseError";
import objectAssign from "object-assign";
class MacroExpander {
constructor(input, macros) {
import type {MacroContextInterface, MacroMap, MacroExpansion} from "./macros";
export default class MacroExpander implements MacroContextInterface {
lexer: Lexer;
macros: MacroMap;
stack: Token[];
discardedWhiteSpace: Token[];
constructor(input: string, macros: MacroMap) {
this.lexer = new Lexer(input);
this.macros = objectAssign({}, builtinMacros, macros);
this.stack = []; // contains tokens in REVERSE order
@@ -21,7 +29,7 @@ class MacroExpander {
* Returns the topmost token on the stack, without expanding it.
* Similar in behavior to TeX's `\futurelet`.
*/
future() {
future(): Token {
if (this.stack.length === 0) {
this.stack.push(this.lexer.lex());
}
@@ -31,7 +39,7 @@ class MacroExpander {
/**
* Remove and return the next unexpanded token.
*/
popToken() {
popToken(): Token {
this.future(); // ensure non-empty stack
return this.stack.pop();
}
@@ -70,7 +78,7 @@ class MacroExpander {
* i.e. things like those defined by \def\foo#1\end{…}.
* See the TeX book page 202ff. for details on how those should behave.
*/
expandOnce() {
expandOnce(): Token | Token[] {
const topToken = this.popToken();
const name = topToken.text;
const isMacro = (name.charAt(0) === "\\");
@@ -83,38 +91,16 @@ class MacroExpander {
this.stack.push(topToken);
return topToken;
}
let expansion = this.macros[name];
if (typeof expansion === "function") {
expansion = expansion.call(this);
}
if (typeof expansion === "string") {
let numArgs = 0;
if (expansion.indexOf("#") !== -1) {
const stripped = expansion.replace(/##/g, "");
while (stripped.indexOf("#" + (numArgs + 1)) !== -1) {
++numArgs;
}
}
const bodyLexer = new Lexer(expansion);
expansion = [];
let tok = bodyLexer.lex();
while (tok.text !== "EOF") {
expansion.push(tok);
tok = bodyLexer.lex();
}
expansion.reverse(); // to fit in with stack using push and pop
expansion.numArgs = numArgs;
// TODO: Could cache macro expansions if it originally came as a
// String (but not those that come in as a Function).
}
if (expansion.numArgs) {
const args = [];
const {tokens, numArgs} = this._getExpansion(name);
let expansion = tokens;
if (numArgs) {
const args: Token[][] = [];
// obtain arguments, either single token or balanced {…} group
for (let i = 0; i < expansion.numArgs; ++i) {
for (let i = 0; i < numArgs; ++i) {
this.consumeSpaces(); // ignore spaces before each argument
const startOfArg = this.popToken();
if (startOfArg.text === "{") {
const arg = [];
const arg: Token[] = [];
let depth = 1;
while (depth !== 0) {
const tok = this.popToken();
@@ -153,12 +139,8 @@ class MacroExpander {
if (tok.text === "#") { // ## → #
expansion.splice(i + 1, 1); // drop first #
} else if (/^[1-9]$/.test(tok.text)) {
// expansion.splice(i, 2, arg[0], arg[1], …)
// to replace placeholder with the indicated argument.
// TODO: use spread once we move to ES2015
expansion.splice.apply(
expansion,
[i, 2].concat(args[tok.text - 1]));
// replace the placeholder with the indicated argument
expansion.splice(i, 2, ...args[tok.text - 1]);
} else {
throw new ParseError(
"Not a valid argument number",
@@ -168,7 +150,7 @@ class MacroExpander {
}
}
// Concatenate expansion onto top of stack.
this.stack.push.apply(this.stack, expansion);
this.stack.push(...expansion);
return expansion;
}
@@ -178,7 +160,7 @@ class MacroExpander {
* Similar in behavior to TeX's `\expandafter\futurelet`.
* Equivalent to expandOnce() followed by future().
*/
expandAfterFuture() {
expandAfterFuture(): Token {
this.expandOnce();
return this.future();
}
@@ -186,7 +168,7 @@ class MacroExpander {
/**
* Recursively expand first token, then return first non-expandable token.
*/
expandNextToken() {
expandNextToken(): Token {
for (;;) {
const expanded = this.expandOnce();
// expandOnce returns Token if and only if it's fully expanded.
@@ -200,6 +182,47 @@ class MacroExpander {
}
}
}
// Flow unable to figure out that this pathway is impossible.
// https://github.com/facebook/flow/issues/4808
throw new Error(); // eslint-disable-line no-unreachable
}
/**
* Returns the expanded macro as a reversed array of tokens and a macro
* argument count.
* Caches macro expansions for those that were defined simple TeX strings.
*/
_getExpansion(name: string): MacroExpansion {
const definition = this.macros[name];
const expansion =
typeof definition === "function" ? definition(this) : definition;
if (typeof expansion === "string") {
let numArgs = 0;
if (expansion.indexOf("#") !== -1) {
const stripped = expansion.replace(/##/g, "");
while (stripped.indexOf("#" + (numArgs + 1)) !== -1) {
++numArgs;
}
}
const bodyLexer = new Lexer(expansion);
const tokens = [];
let tok = bodyLexer.lex();
while (tok.text !== "EOF") {
tokens.push(tok);
tok = bodyLexer.lex();
}
tokens.reverse(); // to fit in with stack using push and pop
const expanded = {tokens, numArgs};
// Cannot cache a macro defined using a function since it relies on
// parser context.
if (typeof definition !== "function") {
this.macros[name] = expanded;
}
return expanded;
}
return expansion;
}
/**
@@ -210,7 +233,7 @@ class MacroExpander {
* Any skipped whitespace is stored in `this.discardedWhiteSpace`
* so that `unget` can correctly undo the effects of `get`.
*/
get(ignoreSpace) {
get(ignoreSpace: boolean): Token {
this.discardedWhiteSpace = [];
let token = this.expandNextToken();
if (ignoreSpace) {
@@ -229,7 +252,7 @@ class MacroExpander {
* was got in the old mode but should get got again in a new mode
* with possibly different whitespace handling.
*/
unget(token) {
unget(token: Token) {
this.stack.push(token);
while (this.discardedWhiteSpace.length !== 0) {
this.stack.push(this.discardedWhiteSpace.pop());
@@ -237,4 +260,3 @@ class MacroExpander {
}
}
module.exports = MacroExpander;

View File

@@ -6,10 +6,39 @@
import symbols from "./symbols";
import utils from "./utils";
import {Token} from "./Token";
// This function might one day accept additional argument and do more things.
function defineMacro(name: string, body: string | () => string) {
module.exports[name] = body;
/**
* Provides context to macros defined by functions. Implemented by
* MacroExpander.
*/
export interface MacroContextInterface {
/**
* Returns the topmost token on the stack, without expanding it.
* Similar in behavior to TeX's `\futurelet`.
*/
future(): Token;
/**
* Expand the next token only once (if possible), and return the resulting
* top token on the stack (without removing anything from the stack).
* Similar in behavior to TeX's `\expandafter\futurelet`.
*/
expandAfterFuture(): Token;
}
/** Macro tokens (in reverse order). */
export type MacroExpansion = {tokens: Token[], numArgs: number};
type MacroDefinition = string | (MacroContextInterface => string) | MacroExpansion;
export type MacroMap = {[string]: MacroDefinition};
const builtinMacros: MacroMap = {};
export default builtinMacros;
// This function might one day accept an additional argument and do more things.
function defineMacro(name: string, body: string | MacroContextInterface => string) {
builtinMacros[name] = body;
}
//////////////////////////////////////////////////////////////////////
@@ -107,14 +136,14 @@ const dotsByToken = {
'\\DOTSX': '\\dotsx',
};
defineMacro("\\dots", function() {
defineMacro("\\dots", function(context) {
// TODO: If used in text mode, should expand to \textellipsis.
// However, in KaTeX, \textellipsis and \ldots behave the same
// (in text mode), and it's unlikely we'd see any of the math commands
// that affect the behavior of \dots when in text mode. So fine for now
// (until we support \ifmmode ... \else ... \fi).
let thedots = '\\dotso';
const next = this.expandAfterFuture().text;
const next = context.expandAfterFuture().text;
if (next in dotsByToken) {
thedots = dotsByToken[next];
} else if (next.substr(0, 4) === '\\not') {
@@ -152,8 +181,8 @@ const spaceAfterDots = {
',': true,
};
defineMacro("\\dotso", function() {
const next = this.future().text;
defineMacro("\\dotso", function(context) {
const next = context.future().text;
if (next in spaceAfterDots) {
return "\\ldots\\,";
} else {
@@ -161,8 +190,8 @@ defineMacro("\\dotso", function() {
}
});
defineMacro("\\dotsc", function() {
const next = this.future().text;
defineMacro("\\dotsc", function(context) {
const next = context.future().text;
// \dotsc uses \extra@ but not \extrap@, instead specially checking for
// ';' and '.', but doesn't check for ','.
if (next in spaceAfterDots && next !== ',') {
@@ -172,8 +201,8 @@ defineMacro("\\dotsc", function() {
}
});
defineMacro("\\cdots", function() {
const next = this.future().text;
defineMacro("\\cdots", function(context) {
const next = context.future().text;
if (next in spaceAfterDots) {
return "\\@cdots\\,";
} else {