Add support for \expandafter, \noexpand, \edef, \let, and \long (#2122)

* Add support for \expandafter

* Add support for \noexpand

* Add support for \edef

* Update comments

* Allow \long before macro definition

* Update documentation

* Update comments

* Fix defPrefix

* Add support for \let

* Update documentation

* Print error token

* Update documentation

* Check whether command is expandable

* Add tests

* Fix token order

* Make noexpand a Token property

* Throw error if control sequence is undefined when expanding

* Rename expandableOnly to expandOnly

* Make unexpandable macro property

* Move \expandafter to macros.js

* Add TODO

* Fix merge conflict

* Update a test case

* Remove unused functions in MacroContextInterface

* Update comments

* Refactor code

* Move \noexpand to macros

* Update MacroExpander.js

* Add a test case

* Separate control sequence check to a function

* Add support for \futurelet

* Separate RHS getter to a function

* Update documentation

* Move expandOnly logic to expandOnce

* Refactor code and update comments

Co-authored-by: Kevin Barabash <kevinb@khanacademy.org>
This commit is contained in:
ylemkimon
2020-03-11 12:14:34 +09:00
committed by GitHub
parent d6a4379b49
commit 9917d1ce84
9 changed files with 319 additions and 27 deletions

View File

@@ -24,6 +24,9 @@ title: Common Issues
behavior matches MathJax with its `color.js` extension enabled.
- Equivalents of MathJax `\class`, `\cssId`, and `\style` are `\htmlClass`,
`\htmlId`, and `\htmlStyle`, respectively, to avoid ambiguity.
- Some symbols are defined using macro instead of `\DeclareMathSymbol` or similar
as in LaTeX. This may cause different behavior in expansion. They may expand
into multiple tokens and be affected by `\expandafter` and `\noexpand`.
## Troubleshooting

View File

@@ -347,6 +347,7 @@ use `\ce` instead|
|Symbol/Function | Rendered | Source or Comment|
|:---------------|:------------|:-----------------|
|\edef|$\def\foo{a}\edef\bar{\foo}\def\foo{}\bar$|`\def\foo{a}\edef\bar{\foo}\def\foo{}\bar`|
|\ell|$\ell$||
|\else|<span style="color:firebrick;">Not supported</span>|[Issue #1003](https://github.com/KaTeX/KaTeX/issues/1003)|
|\em|<span style="color:firebrick;">Not supported</span>||
@@ -380,6 +381,7 @@ use `\ce` instead|
|\exist|$\exist$||
|\exists|$\exists$||
|\exp|$\exp$||
|\expandafter|||
## F
@@ -396,6 +398,7 @@ use `\ce` instead|
|\frac|$\frac a b$|`\frac a b`|
|\frak|$\frak{AaBb}$|`\frak{AaBb}`|
|\frown|$\frown$||
|\futurelet|||
## G
@@ -571,7 +574,7 @@ use `\ce` instead|
|\lesseqqgtr|$\lesseqqgtr$||
|\lessgtr|$\lessgtr$||
|\lesssim|$\lesssim$||
|\let|<span style="color:firebrick;">Not supported</span>|[Issue #1413](https://github.com/KaTeX/KaTeX/issues/1413)|
|\let|||
|\lfloor|$\lfloor$||
|\lg|$\lg$||
|\lgroup|$\lgroup$||
@@ -595,6 +598,7 @@ use `\ce` instead|
|\lnot|$\lnot$||
|\lnsim|$\lnsim$||
|\log|$\log$||
|\long|||
|\Longleftarrow|$\Longleftarrow$||
|\longleftarrow|$\longleftarrow$||
|\Longleftrightarrow|$\Longleftrightarrow$||
@@ -718,6 +722,7 @@ use `\ce` instead|
|\nmid|$\nmid$||
|\nobreak|||
|\nobreakspace|$a\nobreakspace b$|`a\nobreakspace b`|
|\noexpand|||
|\nolimits|$\lim\nolimits_x$|`\lim\nolimits_x`|
|\normalfont|<span style="color:firebrick;">Not supported</span>||
|\normalsize|$\normalsize normalsize$|`\normalsize normalsize`|
@@ -1197,6 +1202,7 @@ use `\ce` instead|
|Symbol/Function | Rendered | Source or Comment|
|:---------------|:------------|:-----------------|
|\xcancel|$\xcancel{ABC}$|`\xcancel{ABC}`|
|\xdef|$\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar$|`\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar`|
|\Xi|$\Xi$||
|\xi|$\xi$||
|\xhookleftarrow|$\xhookleftarrow{abc}$|`\xhookleftarrow{abc}`|

View File

@@ -297,6 +297,10 @@ Direct Input: $∀ ∴ ∁ ∵ ∃ ∈ ∉ ∋ ⊂ ⊃ ∧ ↦ → ←
|:-------------------------------------|:------
|$\def\foo{x^2} \foo + \foo$ | `\def\foo{x^2} \foo + \foo`
|$\gdef\bar#1{#1^2} \bar{y} + \bar{y}$ | `\gdef\bar#1{#1^2} \bar{y} + \bar{y}`
| | `\edef\macroname#1#2…{definition to be expanded}`
| | `\xdef\macroname#1#2…{definition to be expanded}`
| | `\let\foo=\bar`
| | `\futurelet\foo\bar x`
| | `\global\def\macroname#1#2…{definition}`
| | `\newcommand\macroname[numargs]{definition}`
| | `\renewcommand\macroname[numargs]{definition}`
@@ -306,11 +310,13 @@ Macros can also be defined in the KaTeX [rendering options](options.md).
Macros accept up to nine arguments: #1, #2, etc.
`\gdef` and `\global\def` macros will persist between math expressions.
`\gdef`, `\xdef`, `\global\def`, `\global\edef`, `\global\let`, and `\global\futurelet` will persist between math expressions.
KaTeX has no `\par`, so all macros are long by default and `\long` will be ignored.
Available functions include:
`\char` `\mathchoice` `\TextOrMath` `\@ifstar` `\@ifnextchar` `\@firstoftwo` `\@secondoftwo` `\relax`
`\char` `\mathchoice` `\TextOrMath` `\@ifstar` `\@ifnextchar` `\@firstoftwo` `\@secondoftwo` `\relax` `\expandafter` `\noexpand`
@ is a valid character for commands, as if `\makeatletter` were in effect.

View File

@@ -180,13 +180,19 @@ export default class MacroExpander implements MacroContextInterface {
* At the moment, macro expansion doesn't handle delimited macros,
* i.e. things like those defined by \def\foo#1\end{…}.
* See the TeX book page 202ff. for details on how those should behave.
*
* If expandableOnly, only expandable tokens are expanded and
* an undefined control sequence results in an error.
*/
expandOnce(): Token | Token[] {
expandOnce(expandableOnly?: boolean): Token | Token[] {
const topToken = this.popToken();
const name = topToken.text;
const expansion = this._getExpansion(name);
if (expansion == null) { // mainly checking for undefined here
// Fully expanded
const expansion = !topToken.noexpand ? this._getExpansion(name) : null;
if (expansion == null || (expandableOnly && expansion.unexpandable)) {
if (expandableOnly && expansion == null &&
name[0] === "\\" && !this.isDefined(name)) {
throw new ParseError("Undefined control sequence: " + name);
}
this.pushToken(topToken);
return topToken;
}
@@ -248,7 +254,9 @@ export default class MacroExpander implements MacroContextInterface {
if (expanded instanceof Token) {
// \relax stops the expansion, but shouldn't get returned (a
// null return value couldn't get implemented as a function).
if (expanded.text === "\\relax") {
// the token after \noexpand is interpreted as if its meaning
// were \relax
if (expanded.text === "\\relax" || expanded.treatAsRelax) {
this.stack.pop();
} else {
return this.stack.pop(); // === expanded
@@ -266,16 +274,26 @@ export default class MacroExpander implements MacroContextInterface {
* tokens, or return `undefined` if no such macro is defined.
*/
expandMacro(name: string): Token[] | void {
if (!this.macros.get(name)) {
return undefined;
return this.macros.has(name)
? this.expandTokens([new Token(name)]) : undefined;
}
/**
* Fully expand the given token stream and return the resulting list of tokens
*/
expandTokens(tokens: Token[]): Token[] {
const output = [];
const oldStackLength = this.stack.length;
this.pushToken(new Token(name));
this.pushTokens(tokens);
while (this.stack.length > oldStackLength) {
const expanded = this.expandOnce();
const expanded = this.expandOnce(true); // expand only expandable tokens
// expandOnce returns Token if and only if it's fully expanded.
if (expanded instanceof Token) {
if (expanded.treatAsRelax) {
// the expansion of \noexpand is the token itself
expanded.noexpand = false;
expanded.treatAsRelax = false;
}
output.push(this.stack.pop());
}
}
@@ -342,4 +360,15 @@ export default class MacroExpander implements MacroContextInterface {
symbols.text.hasOwnProperty(name) ||
implicitCommands.hasOwnProperty(name);
}
/**
* Determine whether a command is expandable.
*/
isExpandable(name: string): boolean {
const macro = this.macros.get(name);
return macro != null ? typeof macro === "string"
|| typeof macro === "function" || !macro.unexpandable
// TODO(ylem): #2085
: functions.hasOwnProperty(name)/* && !functions[name].primitive*/;
}
}

View File

@@ -23,6 +23,8 @@ export interface LexerInterface {input: string, tokenRegex: RegExp}
export class Token {
text: string;
loc: ?SourceLocation;
noexpand: ?boolean; // don't expand the token
treatAsRelax: ?boolean; // used in \noexpand
constructor(
text: string, // the text of this token

View File

@@ -4,37 +4,87 @@ import ParseError from "../ParseError";
import {assertNodeType} from "../parseNode";
const globalMap = {
"\\global": "\\global",
"\\long": "\\\\globallong",
"\\\\globallong": "\\\\globallong",
"\\def": "\\gdef",
"\\gdef": "\\gdef",
"\\edef": "\\xdef",
"\\xdef": "\\xdef",
"\\let": "\\\\globallet",
"\\futurelet": "\\\\globalfuture",
};
// Basic support for macro definitions:
// \def\macro{expansion}
// \def\macro#1{expansion}
// \def\macro#1#2{expansion}
// \def\macro#1#2#3#4#5#6#7#8#9{expansion}
// Also the \gdef and \global\def equivalents
const checkControlSequence = (tok) => {
const name = tok.text;
if (/^(?:[\\{}$&#^_]|EOF)$/.test(name)) {
throw new ParseError("Expected a control sequence", tok);
}
return name;
};
const getRHS = (parser) => {
let tok = parser.gullet.popToken();
if (tok.text === "=") { // consume optional equals
tok = parser.gullet.popToken();
if (tok.text === " ") { // consume one optional space
tok = parser.gullet.popToken();
}
}
return tok;
};
const letCommand = (parser, name, tok, global) => {
let macro = parser.gullet.macros.get(tok.text);
if (macro == null) {
// don't expand it later even if a macro with the same name is defined
// e.g., \let\foo=\frac \def\frac{\relax} \frac12
tok.noexpand = true;
macro = {
tokens: [tok],
numArgs: 0,
// reproduce the same behavior in expansion
unexpandable: !parser.gullet.isExpandable(tok.text),
};
}
parser.gullet.macros.set(name, macro, global);
};
// <assignment> -> <non-macro assignment>|<macro assignment>
// <non-macro assignment> -> <simple assignment>|\global<non-macro assignment>
// <macro assignment> -> <definition>|<prefix><macro assignment>
// <prefix> -> \global|\long|\outer
defineFunction({
type: "internal",
names: ["\\global"],
names: [
"\\global", "\\long",
"\\\\globallong", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser}) {
handler({parser, funcName}) {
parser.consumeSpaces();
const token = parser.fetch();
if (globalMap[token.text]) {
// KaTeX doesn't have \par, so ignore \long
if (funcName === "\\global" || funcName === "\\\\globallong") {
token.text = globalMap[token.text];
}
return assertNodeType(parser.parseFunction(), "internal");
}
throw new ParseError(`Invalid token after \\global`, token);
throw new ParseError(`Invalid token after macro prefix`, token);
},
});
// Basic support for macro definitions: \def, \gdef, \edef, \xdef
// <definition> -> <def><control sequence><definition text>
// <def> -> \def|\gdef|\edef|\xdef
// <definition text> -> <parameter text><left brace><balanced text><right brace>
defineFunction({
type: "internal",
names: ["\\def", "\\gdef"],
names: ["\\def", "\\gdef", "\\edef", "\\xdef"],
props: {
numArgs: 0,
allowedInText: true,
@@ -65,11 +115,15 @@ defineFunction({
}
arg = parser.gullet.consumeArgs(1)[0];
}
if (funcName === "\\edef" || funcName === "\\xdef") {
arg = parser.gullet.expandTokens(arg);
arg.reverse(); // to fit in with stack order
}
// Final arg is the expansion of the macro
parser.gullet.macros.set(name, {
tokens: arg,
numArgs,
}, funcName === "\\gdef");
}, funcName === globalMap[funcName]);
return {
type: "internal",
@@ -77,3 +131,54 @@ defineFunction({
};
},
});
// <simple assignment> -> <let assignment>
// <let assignment> -> \futurelet<control sequence><token><token>
// | \let<control sequence><equals><one optional space><token>
// <equals> -> <optional spaces>|<optional spaces>=
defineFunction({
type: "internal",
names: [
"\\let",
"\\\\globallet", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser, funcName}) {
const name = checkControlSequence(parser.gullet.popToken());
parser.gullet.consumeSpaces();
const tok = getRHS(parser);
letCommand(parser, name, tok, funcName === "\\\\globallet");
return {
type: "internal",
mode: parser.mode,
};
},
});
// ref: https://www.tug.org/TUGboat/tb09-3/tb22bechtolsheim.pdf
defineFunction({
type: "internal",
names: [
"\\futurelet",
"\\\\globalfuture", // cant be entered directly
],
props: {
numArgs: 0,
allowedInText: true,
},
handler({parser, funcName}) {
const name = checkControlSequence(parser.gullet.popToken());
const middle = parser.gullet.popToken();
const tok = parser.gullet.popToken();
letCommand(parser, name, tok, funcName === "\\\\globalfuture");
parser.gullet.pushToken(tok);
parser.gullet.pushToken(middle);
return {
type: "internal",
mode: parser.mode,
};
},
});

View File

@@ -42,6 +42,11 @@ export interface MacroContextInterface {
*/
consumeSpaces(): void;
/**
* Expand the next token only once if possible.
*/
expandOnce(expandableOnly?: boolean): Token | Token[];
/**
* Expand the next token only once (if possible), and return the resulting
* top token on the stack (without removing anything from the stack).
@@ -79,10 +84,19 @@ export interface MacroContextInterface {
* `implicitCommands`.
*/
isDefined(name: string): boolean;
/**
* Determine whether a command is expandable.
*/
isExpandable(name: string): boolean;
}
/** Macro tokens (in reverse order). */
export type MacroExpansion = {tokens: Token[], numArgs: number};
export type MacroExpansion = {
tokens: Token[],
numArgs: number,
unexpandable?: boolean, // used in \let
};
export type MacroDefinition = string | MacroExpansion |
(MacroContextInterface => (string | MacroExpansion));
@@ -99,6 +113,29 @@ export function defineMacro(name: string, body: MacroDefinition) {
//////////////////////////////////////////////////////////////////////
// macro tools
defineMacro("\\noexpand", function(context) {
// The expansion is the token itself; but that token is interpreted
// as if its meaning were \relax if it is a control sequence that
// would ordinarily be expanded by TeXs expansion rules.
const t = context.popToken();
if (context.isExpandable(t.text)) {
t.noexpand = true;
t.treatAsRelax = true;
}
return {tokens: [t], numArgs: 0};
});
defineMacro("\\expandafter", function(context) {
// TeX first reads the token that comes immediately after \expandafter,
// without expanding it; lets call this token t. Then TeX reads the
// token that comes after t (and possibly more tokens, if that token
// has an argument), replacing it by its expansion. Finally TeX puts
// t back in front of that expansion.
const t = context.popToken();
context.expandOnce(true); // expand only an expandable token
return {tokens: [t], numArgs: 0};
});
// LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2
// TeX source: \long\def\@firstoftwo#1#2{#1}
defineMacro("\\@firstoftwo", function(context) {

View File

@@ -562,6 +562,42 @@ exports[`A font parser \\boldsymbol should inherit mbin/mrel from argument 1`] =
]
`;
exports[`A macro expander \\let should consume one optional space after equals sign 1`] = `
[
{
"type": "ordgroup",
"body": [
{
"type": "font",
"body": {
"type": "ordgroup",
"body": [
{
"type": "mathord",
"loc": {
"end": 39,
"lexer": {
"input": "\\\\def\\\\bold{\\\\bgroup\\\\bf\\\\let\\\\next= }\\\\bold{a}",
"lastIndex": 40
},
"start": 38
},
"mode": "math",
"text": "a"
}
],
"mode": "math"
},
"font": "mathbf",
"mode": "math"
}
],
"loc": null,
"mode": "math"
}
]
`;
exports[`A parse tree generator generates a tree 1`] = `
[
{

View File

@@ -2975,6 +2975,30 @@ describe("A macro expander", function() {
}}));
});
it("should delay expansion if preceded by \\expandafter", function() {
expect`\expandafter\foo\bar`.toParseLike("x+y", new Settings({macros: {
"\\foo": "#1+#2",
"\\bar": "xy",
}}));
expect`\def\foo{x}\def\bar{\def\foo{y}}\expandafter\bar\foo`.toParseLike`x`;
// \def is not expandable, i.e., \expandafter doesn't define the macro
expect`\expandafter\foo\def\foo{x}`.not.toParse();
});
it("should not expand if preceded by \\noexpand", function() {
// \foo is not expanded and interpreted as if its meaning were \relax
expect`\noexpand\foo y`.toParseLike("y",
new Settings({macros: {"\\foo": "x"}}));
// \noexpand is expandable, so the second \foo is not expanded
expect`\expandafter\foo\noexpand\foo`.toParseLike("x",
new Settings({macros: {"\\foo": "x"}}));
// \frac is a macro and therefore expandable
expect`\noexpand\frac xy`.toParseLike`xy`;
// TODO(ylem): #2085
// \def is not expandable, so is not affected by \noexpand
// expect`\noexpand\def\foo{xy}\foo`.toParseLike`xy`;
});
it("should allow for space macro argument (text version)", function() {
expect`\text{\foo\bar}`.toParseLike(r`\text{( )}`, new Settings({macros: {
"\\foo": "(#1)",
@@ -3156,6 +3180,16 @@ describe("A macro expander", function() {
//expect`\gdef{\foo}{}`.not.toParse();
});
it("\\xdef should expand definition", function() {
expect`\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar`.toParseLike`a`;
// \def\noexpand\foo{} expands into \def\foo{}
expect`\def\foo{a}\xdef\bar{\def\noexpand\foo{}}\foo\bar\foo`.toParseLike`a`;
// \foo\noexpand\foo expands into a\foo
expect`\def\foo{a}\xdef\bar{\foo\noexpand\foo}\def\foo{b}\bar`.toParseLike`ab`;
// \foo is not defined
expect`\xdef\bar{\foo}`.not.toParse();
});
it("\\def should be handled in Parser", () => {
expect`\gdef\foo{1}`.toParse(new Settings({maxExpand: 0}));
expect`2^\def\foo{1}2`.not.toParse();
@@ -3177,13 +3211,23 @@ describe("A macro expander", function() {
"\\x\\def\\x{5}\\x}\\x").toParseLike`1{2{34}35}3`;
});
it("\\global needs to followed by \\def", () => {
it("\\global needs to followed by macro prefixes, \\def or \\edef", () => {
expect`\global\def\foo{}\foo`.toParseLike``;
expect`\global\edef\foo{}\foo`.toParseLike``;
expect`\def\DEF{\def}\global\DEF\foo{}\foo`.toParseLike``;
expect`\global\global\def\foo{}\foo`.toParseLike``;
expect`\global\long\def\foo{}\foo`.toParseLike``;
expect`\global\foo`.not.toParse();
expect`\global\bar x`.not.toParse();
});
it("\\long needs to followed by macro prefixes, \\def or \\edef", () => {
expect`\long\def\foo{}\foo`.toParseLike``;
expect`\long\edef\foo{}\foo`.toParseLike``;
expect`\long\global\def\foo{}\foo`.toParseLike``;
expect`\long\foo`.not.toParse();
});
it("Macro arguments do not generate groups", () => {
expect("\\def\\x{1}\\x\\def\\foo#1{#1}\\foo{\\x\\def\\x{2}\\x}\\x")
.toParseLike`1122`;
@@ -3225,6 +3269,30 @@ describe("A macro expander", function() {
expect(macros["\\foo"]).toBeTruthy();
});
it("\\let copies the definition", () => {
expect`\let\foo=\frac\def\frac{}\foo12`.toParseLike`\frac12`;
expect`\def\foo{1}\let\bar\foo\def\foo{2}\bar`.toParseLike`1`;
expect`\let\foo=\kern\edef\bar{\foo1em}\let\kern=\relax\bar`.toParseLike`\kern1em`;
// \foo = { (left brace)
expect`\let\foo{\frac\foo1}{2}`.toParseLike`\frac{1}{2}`;
// \equals = = (equal sign)
expect`\let\equals==a\equals b`.toParseLike`a=b`;
// \foo should not be expandable and not affected by \noexpand or \edef
expect`\let\foo=x\noexpand\foo`.toParseLike`x`;
expect`\let\foo=x\edef\bar{\foo}\def\foo{y}\bar`.toParseLike`y`;
});
it("\\let should consume one optional space after equals sign", () => {
// https://tex.stackexchange.com/questions/141166/let-foo-bar-vs-let-foo-bar-let-with-equals-sign
expect`\def\:{\let\space= }\: \text{\space}`.toParseLike`\text{ }`;
const tree = getParsed`\def\bold{\bgroup\bf\let\next= }\bold{a}`;
expect(tree).toMatchSnapshot();
});
it("\\futurelet should parse correctly", () => {
expect`\futurelet\foo\frac1{2+\foo}`.toParseLike`\frac1{2+1}`;
});
it("\\newcommand doesn't change settings.macros", () => {
const macros = {};
expect`\newcommand\foo{x^2}\foo+\foo`.toParse(new Settings({macros}));