Add support for \expandafter, \noexpand, \edef, \let, and \long (#2122)

* Add support for \expandafter

* Add support for \noexpand

* Add support for \edef

* Update comments

* Allow \long before macro definition

* Update documentation

* Update comments

* Fix defPrefix

* Add support for \let

* Update documentation

* Print error token

* Update documentation

* Check whether command is expandable

* Add tests

* Fix token order

* Make noexpand a Token property

* Throw error if control sequence is undefined when expanding

* Rename expandableOnly to expandOnly

* Make unexpandable macro property

* Move \expandafter to macros.js

* Add TODO

* Fix merge conflict

* Update a test case

* Remove unused functions in MacroContextInterface

* Update comments

* Refactor code

* Move \noexpand to macros

* Update MacroExpander.js

* Add a test case

* Separate control sequence check to a function

* Add support for \futurelet

* Separate RHS getter to a function

* Update documentation

* Move expandOnly logic to expandOnce

* Refactor code and update comments

Co-authored-by: Kevin Barabash <kevinb@khanacademy.org>
This commit is contained in:
ylemkimon
2020-03-11 12:14:34 +09:00
committed by GitHub
parent d6a4379b49
commit 9917d1ce84
9 changed files with 319 additions and 27 deletions

View File

@@ -180,13 +180,19 @@ export default class MacroExpander implements MacroContextInterface {
* At the moment, macro expansion doesn't handle delimited macros,
* i.e. things like those defined by \def\foo#1\end{…}.
* See the TeX book page 202ff. for details on how those should behave.
*
* If expandableOnly, only expandable tokens are expanded and
* an undefined control sequence results in an error.
*/
expandOnce(): Token | Token[] {
expandOnce(expandableOnly?: boolean): Token | Token[] {
const topToken = this.popToken();
const name = topToken.text;
const expansion = this._getExpansion(name);
if (expansion == null) { // mainly checking for undefined here
// Fully expanded
const expansion = !topToken.noexpand ? this._getExpansion(name) : null;
if (expansion == null || (expandableOnly && expansion.unexpandable)) {
if (expandableOnly && expansion == null &&
name[0] === "\\" && !this.isDefined(name)) {
throw new ParseError("Undefined control sequence: " + name);
}
this.pushToken(topToken);
return topToken;
}
@@ -248,7 +254,9 @@ export default class MacroExpander implements MacroContextInterface {
if (expanded instanceof Token) {
// \relax stops the expansion, but shouldn't get returned (a
// null return value couldn't get implemented as a function).
if (expanded.text === "\\relax") {
// the token after \noexpand is interpreted as if its meaning
// were \relax
if (expanded.text === "\\relax" || expanded.treatAsRelax) {
this.stack.pop();
} else {
return this.stack.pop(); // === expanded
@@ -266,16 +274,26 @@ export default class MacroExpander implements MacroContextInterface {
* tokens, or return `undefined` if no such macro is defined.
*/
expandMacro(name: string): Token[] | void {
if (!this.macros.get(name)) {
return undefined;
}
return this.macros.has(name)
? this.expandTokens([new Token(name)]) : undefined;
}
/**
* Fully expand the given token stream and return the resulting list of tokens
*/
expandTokens(tokens: Token[]): Token[] {
const output = [];
const oldStackLength = this.stack.length;
this.pushToken(new Token(name));
this.pushTokens(tokens);
while (this.stack.length > oldStackLength) {
const expanded = this.expandOnce();
const expanded = this.expandOnce(true); // expand only expandable tokens
// expandOnce returns Token if and only if it's fully expanded.
if (expanded instanceof Token) {
if (expanded.treatAsRelax) {
// the expansion of \noexpand is the token itself
expanded.noexpand = false;
expanded.treatAsRelax = false;
}
output.push(this.stack.pop());
}
}
@@ -342,4 +360,15 @@ export default class MacroExpander implements MacroContextInterface {
symbols.text.hasOwnProperty(name) ||
implicitCommands.hasOwnProperty(name);
}
/**
* Determine whether a command is expandable.
*/
isExpandable(name: string): boolean {
const macro = this.macros.get(name);
return macro != null ? typeof macro === "string"
|| typeof macro === "function" || !macro.unexpandable
// TODO(ylem): #2085
: functions.hasOwnProperty(name)/* && !functions[name].primitive*/;
}
}

View File

@@ -23,6 +23,8 @@ export interface LexerInterface {input: string, tokenRegex: RegExp}
export class Token {
text: string;
loc: ?SourceLocation;
noexpand: ?boolean; // don't expand the token
treatAsRelax: ?boolean; // used in \noexpand
constructor(
text: string, // the text of this token

View File

@@ -4,37 +4,87 @@ import ParseError from "../ParseError";
import {assertNodeType} from "../parseNode";
const globalMap = {
"\\global": "\\global",
"\\long": "\\\\globallong",
"\\\\globallong": "\\\\globallong",
"\\def": "\\gdef",
"\\gdef": "\\gdef",
"\\edef": "\\xdef",
"\\xdef": "\\xdef",
"\\let": "\\\\globallet",
"\\futurelet": "\\\\globalfuture",
};
// Basic support for macro definitions:
// \def\macro{expansion}
// \def\macro#1{expansion}
// \def\macro#1#2{expansion}
// \def\macro#1#2#3#4#5#6#7#8#9{expansion}
// Also the \gdef and \global\def equivalents
const checkControlSequence = (tok) => {
const name = tok.text;
if (/^(?:[\\{}$&#^_]|EOF)$/.test(name)) {
throw new ParseError("Expected a control sequence", tok);
}
return name;
};
const getRHS = (parser) => {
let tok = parser.gullet.popToken();
if (tok.text === "=") { // consume optional equals
tok = parser.gullet.popToken();
if (tok.text === " ") { // consume one optional space
tok = parser.gullet.popToken();
}
}
return tok;
};
const letCommand = (parser, name, tok, global) => {
let macro = parser.gullet.macros.get(tok.text);
if (macro == null) {
// don't expand it later even if a macro with the same name is defined
// e.g., \let\foo=\frac \def\frac{\relax} \frac12
tok.noexpand = true;
macro = {
tokens: [tok],
numArgs: 0,
// reproduce the same behavior in expansion
unexpandable: !parser.gullet.isExpandable(tok.text),
};
}
parser.gullet.macros.set(name, macro, global);
};
// <assignment> -> <non-macro assignment>|<macro assignment>
// <non-macro assignment> -> <simple assignment>|\global<non-macro assignment>
// <macro assignment> -> <definition>|<prefix><macro assignment>
// <prefix> -> \global|\long|\outer
defineFunction({
type: "internal",
names: ["\\global"],
names: [
"\\global", "\\long",
"\\\\globallong", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser}) {
handler({parser, funcName}) {
parser.consumeSpaces();
const token = parser.fetch();
if (globalMap[token.text]) {
token.text = globalMap[token.text];
// KaTeX doesn't have \par, so ignore \long
if (funcName === "\\global" || funcName === "\\\\globallong") {
token.text = globalMap[token.text];
}
return assertNodeType(parser.parseFunction(), "internal");
}
throw new ParseError(`Invalid token after \\global`, token);
throw new ParseError(`Invalid token after macro prefix`, token);
},
});
// Basic support for macro definitions: \def, \gdef, \edef, \xdef
// <definition> -> <def><control sequence><definition text>
// <def> -> \def|\gdef|\edef|\xdef
// <definition text> -> <parameter text><left brace><balanced text><right brace>
defineFunction({
type: "internal",
names: ["\\def", "\\gdef"],
names: ["\\def", "\\gdef", "\\edef", "\\xdef"],
props: {
numArgs: 0,
allowedInText: true,
@@ -65,11 +115,15 @@ defineFunction({
}
arg = parser.gullet.consumeArgs(1)[0];
}
if (funcName === "\\edef" || funcName === "\\xdef") {
arg = parser.gullet.expandTokens(arg);
arg.reverse(); // to fit in with stack order
}
// Final arg is the expansion of the macro
parser.gullet.macros.set(name, {
tokens: arg,
numArgs,
}, funcName === "\\gdef");
}, funcName === globalMap[funcName]);
return {
type: "internal",
@@ -77,3 +131,54 @@ defineFunction({
};
},
});
// <simple assignment> -> <let assignment>
// <let assignment> -> \futurelet<control sequence><token><token>
// | \let<control sequence><equals><one optional space><token>
// <equals> -> <optional spaces>|<optional spaces>=
defineFunction({
type: "internal",
names: [
"\\let",
"\\\\globallet", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser, funcName}) {
const name = checkControlSequence(parser.gullet.popToken());
parser.gullet.consumeSpaces();
const tok = getRHS(parser);
letCommand(parser, name, tok, funcName === "\\\\globallet");
return {
type: "internal",
mode: parser.mode,
};
},
});
// ref: https://www.tug.org/TUGboat/tb09-3/tb22bechtolsheim.pdf
defineFunction({
type: "internal",
names: [
"\\futurelet",
"\\\\globalfuture", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser, funcName}) {
const name = checkControlSequence(parser.gullet.popToken());
const middle = parser.gullet.popToken();
const tok = parser.gullet.popToken();
letCommand(parser, name, tok, funcName === "\\\\globalfuture");
parser.gullet.pushToken(tok);
parser.gullet.pushToken(middle);
return {
type: "internal",
mode: parser.mode,
};
},
});

View File

@@ -42,6 +42,11 @@ export interface MacroContextInterface {
*/
consumeSpaces(): void;
/**
* Expand the next token only once if possible.
*/
expandOnce(expandableOnly?: boolean): Token | Token[];
/**
* Expand the next token only once (if possible), and return the resulting
* top token on the stack (without removing anything from the stack).
@@ -79,10 +84,19 @@ export interface MacroContextInterface {
* `implicitCommands`.
*/
isDefined(name: string): boolean;
/**
* Determine whether a command is expandable.
*/
isExpandable(name: string): boolean;
}
/** Macro tokens (in reverse order). */
export type MacroExpansion = {tokens: Token[], numArgs: number};
export type MacroExpansion = {
tokens: Token[],
numArgs: number,
unexpandable?: boolean, // used in \let
};
export type MacroDefinition = string | MacroExpansion |
(MacroContextInterface => (string | MacroExpansion));
@@ -99,6 +113,29 @@ export function defineMacro(name: string, body: MacroDefinition) {
//////////////////////////////////////////////////////////////////////
// macro tools
defineMacro("\\noexpand", function(context) {
// The expansion is the token itself; but that token is interpreted
// as if its meaning were \relax if it is a control sequence that
// would ordinarily be expanded by TeXs expansion rules.
const t = context.popToken();
if (context.isExpandable(t.text)) {
t.noexpand = true;
t.treatAsRelax = true;
}
return {tokens: [t], numArgs: 0};
});
defineMacro("\\expandafter", function(context) {
// TeX first reads the token that comes immediately after \expandafter,
// without expanding it; lets call this token t. Then TeX reads the
// token that comes after t (and possibly more tokens, if that token
// has an argument), replacing it by its expansion. Finally TeX puts
// t back in front of that expansion.
const t = context.popToken();
context.expandOnce(true); // expand only an expandable token
return {tokens: [t], numArgs: 0};
});
// LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2
// TeX source: \long\def\@firstoftwo#1#2{#1}
defineMacro("\\@firstoftwo", function(context) {