From 9917d1ce843666c547a993f768092dc239e95c3f Mon Sep 17 00:00:00 2001 From: ylemkimon Date: Wed, 11 Mar 2020 12:14:34 +0900 Subject: [PATCH] Add support for \expandafter, \noexpand, \edef, \let, and \long (#2122) * Add support for \expandafter * Add support for \noexpand * Add support for \edef * Update comments * Allow \long before macro definition * Update documentation * Update comments * Fix defPrefix * Add support for \let * Update documentation * Print error token * Update documentation * Check whether command is expandable * Add tests * Fix token order * Make noexpand a Token property * Throw error if control sequence is undefined when expanding * Rename expandableOnly to expandOnly * Make unexpandable macro property * Move \expandafter to macros.js * Add TODO * Fix merge conflict * Update a test case * Remove unused functions in MacroContextInterface * Update comments * Refactor code * Move \noexpand to macros * Update MacroExpander.js * Add a test case * Separate control sequence check to a function * Add support for \futurelet * Separate RHS getter to a function * Update documentation * Move expandOnly logic to expandOnce * Refactor code and update comments Co-authored-by: Kevin Barabash --- docs/issues.md | 3 + docs/support_table.md | 8 +- docs/supported.md | 10 +- src/MacroExpander.js | 49 ++++++++-- src/Token.js | 2 + src/functions/def.js | 129 +++++++++++++++++++++++--- src/macros.js | 39 +++++++- test/__snapshots__/katex-spec.js.snap | 36 +++++++ test/katex-spec.js | 70 +++++++++++++- 9 files changed, 319 insertions(+), 27 deletions(-) diff --git a/docs/issues.md b/docs/issues.md index b3ca0ee0..546f28a3 100644 --- a/docs/issues.md +++ b/docs/issues.md @@ -24,6 +24,9 @@ title: Common Issues behavior matches MathJax with its `color.js` extension enabled. - Equivalents of MathJax `\class`, `\cssId`, and `\style` are `\htmlClass`, `\htmlId`, and `\htmlStyle`, respectively, to avoid ambiguity. +- Some symbols are defined using macro instead of `\DeclareMathSymbol` or similar + as in LaTeX. This may cause different behavior in expansion. They may expand + into multiple tokens and be affected by `\expandafter` and `\noexpand`. ## Troubleshooting diff --git a/docs/support_table.md b/docs/support_table.md index e03f5e23..79a270ff 100644 --- a/docs/support_table.md +++ b/docs/support_table.md @@ -347,6 +347,7 @@ use `\ce` instead| |Symbol/Function | Rendered | Source or Comment| |:---------------|:------------|:-----------------| +|\edef|$\def\foo{a}\edef\bar{\foo}\def\foo{}\bar$|`\def\foo{a}\edef\bar{\foo}\def\foo{}\bar`| |\ell|$\ell$|| |\else|Not supported|[Issue #1003](https://github.com/KaTeX/KaTeX/issues/1003)| |\em|Not supported|| @@ -380,6 +381,7 @@ use `\ce` instead| |\exist|$\exist$|| |\exists|$\exists$|| |\exp|$\exp$|| +|\expandafter||| ## F @@ -396,6 +398,7 @@ use `\ce` instead| |\frac|$\frac a b$|`\frac a b`| |\frak|$\frak{AaBb}$|`\frak{AaBb}`| |\frown|$\frown$|| +|\futurelet||| ## G @@ -571,7 +574,7 @@ use `\ce` instead| |\lesseqqgtr|$\lesseqqgtr$|| |\lessgtr|$\lessgtr$|| |\lesssim|$\lesssim$|| -|\let|Not supported|[Issue #1413](https://github.com/KaTeX/KaTeX/issues/1413)| +|\let||| |\lfloor|$\lfloor$|| |\lg|$\lg$|| |\lgroup|$\lgroup$|| @@ -595,6 +598,7 @@ use `\ce` instead| |\lnot|$\lnot$|| |\lnsim|$\lnsim$|| |\log|$\log$|| +|\long||| |\Longleftarrow|$\Longleftarrow$|| |\longleftarrow|$\longleftarrow$|| |\Longleftrightarrow|$\Longleftrightarrow$|| @@ -718,6 +722,7 @@ use `\ce` instead| |\nmid|$\nmid$|| |\nobreak||| |\nobreakspace|$a\nobreakspace b$|`a\nobreakspace b`| +|\noexpand||| |\nolimits|$\lim\nolimits_x$|`\lim\nolimits_x`| |\normalfont|Not supported|| |\normalsize|$\normalsize normalsize$|`\normalsize normalsize`| @@ -1197,6 +1202,7 @@ use `\ce` instead| |Symbol/Function | Rendered | Source or Comment| |:---------------|:------------|:-----------------| |\xcancel|$\xcancel{ABC}$|`\xcancel{ABC}`| +|\xdef|$\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar$|`\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar`| |\Xi|$\Xi$|| |\xi|$\xi$|| |\xhookleftarrow|$\xhookleftarrow{abc}$|`\xhookleftarrow{abc}`| diff --git a/docs/supported.md b/docs/supported.md index 9c8f1395..1e01777f 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -297,6 +297,10 @@ Direct Input: $∀ ∴ ∁ ∵ ∃ ∣ ∈ ∉ ∋ ⊂ ⊃ ∧ ∨ ↦ → ← |:-------------------------------------|:------ |$\def\foo{x^2} \foo + \foo$ | `\def\foo{x^2} \foo + \foo` |$\gdef\bar#1{#1^2} \bar{y} + \bar{y}$ | `\gdef\bar#1{#1^2} \bar{y} + \bar{y}` +| | `\edef\macroname#1#2…{definition to be expanded}` +| | `\xdef\macroname#1#2…{definition to be expanded}` +| | `\let\foo=\bar` +| | `\futurelet\foo\bar x` | | `\global\def\macroname#1#2…{definition}` | | `\newcommand\macroname[numargs]{definition}` | | `\renewcommand\macroname[numargs]{definition}` @@ -306,11 +310,13 @@ Macros can also be defined in the KaTeX [rendering options](options.md). Macros accept up to nine arguments: #1, #2, etc. -`\gdef` and `\global\def` macros will persist between math expressions. +`\gdef`, `\xdef`, `\global\def`, `\global\edef`, `\global\let`, and `\global\futurelet` will persist between math expressions. + +KaTeX has no `\par`, so all macros are long by default and `\long` will be ignored. Available functions include: -`\char` `\mathchoice` `\TextOrMath` `\@ifstar` `\@ifnextchar` `\@firstoftwo` `\@secondoftwo` `\relax` +`\char` `\mathchoice` `\TextOrMath` `\@ifstar` `\@ifnextchar` `\@firstoftwo` `\@secondoftwo` `\relax` `\expandafter` `\noexpand` @ is a valid character for commands, as if `\makeatletter` were in effect. diff --git a/src/MacroExpander.js b/src/MacroExpander.js index 2b8b23f5..786a19e1 100644 --- a/src/MacroExpander.js +++ b/src/MacroExpander.js @@ -180,13 +180,19 @@ export default class MacroExpander implements MacroContextInterface { * At the moment, macro expansion doesn't handle delimited macros, * i.e. things like those defined by \def\foo#1\end{…}. * See the TeX book page 202ff. for details on how those should behave. + * + * If expandableOnly, only expandable tokens are expanded and + * an undefined control sequence results in an error. */ - expandOnce(): Token | Token[] { + expandOnce(expandableOnly?: boolean): Token | Token[] { const topToken = this.popToken(); const name = topToken.text; - const expansion = this._getExpansion(name); - if (expansion == null) { // mainly checking for undefined here - // Fully expanded + const expansion = !topToken.noexpand ? this._getExpansion(name) : null; + if (expansion == null || (expandableOnly && expansion.unexpandable)) { + if (expandableOnly && expansion == null && + name[0] === "\\" && !this.isDefined(name)) { + throw new ParseError("Undefined control sequence: " + name); + } this.pushToken(topToken); return topToken; } @@ -248,7 +254,9 @@ export default class MacroExpander implements MacroContextInterface { if (expanded instanceof Token) { // \relax stops the expansion, but shouldn't get returned (a // null return value couldn't get implemented as a function). - if (expanded.text === "\\relax") { + // the token after \noexpand is interpreted as if its meaning + // were ‘\relax’ + if (expanded.text === "\\relax" || expanded.treatAsRelax) { this.stack.pop(); } else { return this.stack.pop(); // === expanded @@ -266,16 +274,26 @@ export default class MacroExpander implements MacroContextInterface { * tokens, or return `undefined` if no such macro is defined. */ expandMacro(name: string): Token[] | void { - if (!this.macros.get(name)) { - return undefined; - } + return this.macros.has(name) + ? this.expandTokens([new Token(name)]) : undefined; + } + + /** + * Fully expand the given token stream and return the resulting list of tokens + */ + expandTokens(tokens: Token[]): Token[] { const output = []; const oldStackLength = this.stack.length; - this.pushToken(new Token(name)); + this.pushTokens(tokens); while (this.stack.length > oldStackLength) { - const expanded = this.expandOnce(); + const expanded = this.expandOnce(true); // expand only expandable tokens // expandOnce returns Token if and only if it's fully expanded. if (expanded instanceof Token) { + if (expanded.treatAsRelax) { + // the expansion of \noexpand is the token itself + expanded.noexpand = false; + expanded.treatAsRelax = false; + } output.push(this.stack.pop()); } } @@ -342,4 +360,15 @@ export default class MacroExpander implements MacroContextInterface { symbols.text.hasOwnProperty(name) || implicitCommands.hasOwnProperty(name); } + + /** + * Determine whether a command is expandable. + */ + isExpandable(name: string): boolean { + const macro = this.macros.get(name); + return macro != null ? typeof macro === "string" + || typeof macro === "function" || !macro.unexpandable + // TODO(ylem): #2085 + : functions.hasOwnProperty(name)/* && !functions[name].primitive*/; + } } diff --git a/src/Token.js b/src/Token.js index 2a26023f..5ceaf168 100644 --- a/src/Token.js +++ b/src/Token.js @@ -23,6 +23,8 @@ export interface LexerInterface {input: string, tokenRegex: RegExp} export class Token { text: string; loc: ?SourceLocation; + noexpand: ?boolean; // don't expand the token + treatAsRelax: ?boolean; // used in \noexpand constructor( text: string, // the text of this token diff --git a/src/functions/def.js b/src/functions/def.js index c970b844..8fb5073a 100644 --- a/src/functions/def.js +++ b/src/functions/def.js @@ -4,37 +4,87 @@ import ParseError from "../ParseError"; import {assertNodeType} from "../parseNode"; const globalMap = { + "\\global": "\\global", + "\\long": "\\\\globallong", + "\\\\globallong": "\\\\globallong", "\\def": "\\gdef", "\\gdef": "\\gdef", + "\\edef": "\\xdef", + "\\xdef": "\\xdef", + "\\let": "\\\\globallet", + "\\futurelet": "\\\\globalfuture", }; -// Basic support for macro definitions: -// \def\macro{expansion} -// \def\macro#1{expansion} -// \def\macro#1#2{expansion} -// \def\macro#1#2#3#4#5#6#7#8#9{expansion} -// Also the \gdef and \global\def equivalents +const checkControlSequence = (tok) => { + const name = tok.text; + if (/^(?:[\\{}$&#^_]|EOF)$/.test(name)) { + throw new ParseError("Expected a control sequence", tok); + } + return name; +}; + +const getRHS = (parser) => { + let tok = parser.gullet.popToken(); + if (tok.text === "=") { // consume optional equals + tok = parser.gullet.popToken(); + if (tok.text === " ") { // consume one optional space + tok = parser.gullet.popToken(); + } + } + return tok; +}; + +const letCommand = (parser, name, tok, global) => { + let macro = parser.gullet.macros.get(tok.text); + if (macro == null) { + // don't expand it later even if a macro with the same name is defined + // e.g., \let\foo=\frac \def\frac{\relax} \frac12 + tok.noexpand = true; + macro = { + tokens: [tok], + numArgs: 0, + // reproduce the same behavior in expansion + unexpandable: !parser.gullet.isExpandable(tok.text), + }; + } + parser.gullet.macros.set(name, macro, global); +}; + +// -> | +// -> |\global +// -> | +// -> \global|\long|\outer defineFunction({ type: "internal", - names: ["\\global"], + names: [ + "\\global", "\\long", + "\\\\globallong", // can’t be entered directly + ], props: { numArgs: 0, allowedInText: true, }, - handler({parser}) { + handler({parser, funcName}) { parser.consumeSpaces(); const token = parser.fetch(); if (globalMap[token.text]) { - token.text = globalMap[token.text]; + // KaTeX doesn't have \par, so ignore \long + if (funcName === "\\global" || funcName === "\\\\globallong") { + token.text = globalMap[token.text]; + } return assertNodeType(parser.parseFunction(), "internal"); } - throw new ParseError(`Invalid token after \\global`, token); + throw new ParseError(`Invalid token after macro prefix`, token); }, }); +// Basic support for macro definitions: \def, \gdef, \edef, \xdef +// -> +// -> \def|\gdef|\edef|\xdef +// -> defineFunction({ type: "internal", - names: ["\\def", "\\gdef"], + names: ["\\def", "\\gdef", "\\edef", "\\xdef"], props: { numArgs: 0, allowedInText: true, @@ -65,11 +115,15 @@ defineFunction({ } arg = parser.gullet.consumeArgs(1)[0]; } + if (funcName === "\\edef" || funcName === "\\xdef") { + arg = parser.gullet.expandTokens(arg); + arg.reverse(); // to fit in with stack order + } // Final arg is the expansion of the macro parser.gullet.macros.set(name, { tokens: arg, numArgs, - }, funcName === "\\gdef"); + }, funcName === globalMap[funcName]); return { type: "internal", @@ -77,3 +131,54 @@ defineFunction({ }; }, }); + +// -> +// -> \futurelet +// | \let +// -> |= +defineFunction({ + type: "internal", + names: [ + "\\let", + "\\\\globallet", // can’t be entered directly + ], + props: { + numArgs: 0, + allowedInText: true, + }, + handler({parser, funcName}) { + const name = checkControlSequence(parser.gullet.popToken()); + parser.gullet.consumeSpaces(); + const tok = getRHS(parser); + letCommand(parser, name, tok, funcName === "\\\\globallet"); + return { + type: "internal", + mode: parser.mode, + }; + }, +}); + +// ref: https://www.tug.org/TUGboat/tb09-3/tb22bechtolsheim.pdf +defineFunction({ + type: "internal", + names: [ + "\\futurelet", + "\\\\globalfuture", // can’t be entered directly + ], + props: { + numArgs: 0, + allowedInText: true, + }, + handler({parser, funcName}) { + const name = checkControlSequence(parser.gullet.popToken()); + const middle = parser.gullet.popToken(); + const tok = parser.gullet.popToken(); + letCommand(parser, name, tok, funcName === "\\\\globalfuture"); + parser.gullet.pushToken(tok); + parser.gullet.pushToken(middle); + return { + type: "internal", + mode: parser.mode, + }; + }, +}); diff --git a/src/macros.js b/src/macros.js index a59b8033..b66c7cc2 100644 --- a/src/macros.js +++ b/src/macros.js @@ -42,6 +42,11 @@ export interface MacroContextInterface { */ consumeSpaces(): void; + /** + * Expand the next token only once if possible. + */ + expandOnce(expandableOnly?: boolean): Token | Token[]; + /** * Expand the next token only once (if possible), and return the resulting * top token on the stack (without removing anything from the stack). @@ -79,10 +84,19 @@ export interface MacroContextInterface { * `implicitCommands`. */ isDefined(name: string): boolean; + + /** + * Determine whether a command is expandable. + */ + isExpandable(name: string): boolean; } /** Macro tokens (in reverse order). */ -export type MacroExpansion = {tokens: Token[], numArgs: number}; +export type MacroExpansion = { + tokens: Token[], + numArgs: number, + unexpandable?: boolean, // used in \let +}; export type MacroDefinition = string | MacroExpansion | (MacroContextInterface => (string | MacroExpansion)); @@ -99,6 +113,29 @@ export function defineMacro(name: string, body: MacroDefinition) { ////////////////////////////////////////////////////////////////////// // macro tools +defineMacro("\\noexpand", function(context) { + // The expansion is the token itself; but that token is interpreted + // as if its meaning were ‘\relax’ if it is a control sequence that + // would ordinarily be expanded by TeX’s expansion rules. + const t = context.popToken(); + if (context.isExpandable(t.text)) { + t.noexpand = true; + t.treatAsRelax = true; + } + return {tokens: [t], numArgs: 0}; +}); + +defineMacro("\\expandafter", function(context) { + // TeX first reads the token that comes immediately after \expandafter, + // without expanding it; let’s call this token t. Then TeX reads the + // token that comes after t (and possibly more tokens, if that token + // has an argument), replacing it by its expansion. Finally TeX puts + // t back in front of that expansion. + const t = context.popToken(); + context.expandOnce(true); // expand only an expandable token + return {tokens: [t], numArgs: 0}; +}); + // LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2 // TeX source: \long\def\@firstoftwo#1#2{#1} defineMacro("\\@firstoftwo", function(context) { diff --git a/test/__snapshots__/katex-spec.js.snap b/test/__snapshots__/katex-spec.js.snap index d8b2cd71..99dec740 100755 --- a/test/__snapshots__/katex-spec.js.snap +++ b/test/__snapshots__/katex-spec.js.snap @@ -562,6 +562,42 @@ exports[`A font parser \\boldsymbol should inherit mbin/mrel from argument 1`] = ] `; +exports[`A macro expander \\let should consume one optional space after equals sign 1`] = ` +[ + { + "type": "ordgroup", + "body": [ + { + "type": "font", + "body": { + "type": "ordgroup", + "body": [ + { + "type": "mathord", + "loc": { + "end": 39, + "lexer": { + "input": "\\\\def\\\\bold{\\\\bgroup\\\\bf\\\\let\\\\next= }\\\\bold{a}", + "lastIndex": 40 + }, + "start": 38 + }, + "mode": "math", + "text": "a" + } + ], + "mode": "math" + }, + "font": "mathbf", + "mode": "math" + } + ], + "loc": null, + "mode": "math" + } +] +`; + exports[`A parse tree generator generates a tree 1`] = ` [ { diff --git a/test/katex-spec.js b/test/katex-spec.js index 3db574f0..ad45baa2 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -2975,6 +2975,30 @@ describe("A macro expander", function() { }})); }); + it("should delay expansion if preceded by \\expandafter", function() { + expect`\expandafter\foo\bar`.toParseLike("x+y", new Settings({macros: { + "\\foo": "#1+#2", + "\\bar": "xy", + }})); + expect`\def\foo{x}\def\bar{\def\foo{y}}\expandafter\bar\foo`.toParseLike`x`; + // \def is not expandable, i.e., \expandafter doesn't define the macro + expect`\expandafter\foo\def\foo{x}`.not.toParse(); + }); + + it("should not expand if preceded by \\noexpand", function() { + // \foo is not expanded and interpreted as if its meaning were \relax + expect`\noexpand\foo y`.toParseLike("y", + new Settings({macros: {"\\foo": "x"}})); + // \noexpand is expandable, so the second \foo is not expanded + expect`\expandafter\foo\noexpand\foo`.toParseLike("x", + new Settings({macros: {"\\foo": "x"}})); + // \frac is a macro and therefore expandable + expect`\noexpand\frac xy`.toParseLike`xy`; + // TODO(ylem): #2085 + // \def is not expandable, so is not affected by \noexpand + // expect`\noexpand\def\foo{xy}\foo`.toParseLike`xy`; + }); + it("should allow for space macro argument (text version)", function() { expect`\text{\foo\bar}`.toParseLike(r`\text{( )}`, new Settings({macros: { "\\foo": "(#1)", @@ -3156,6 +3180,16 @@ describe("A macro expander", function() { //expect`\gdef{\foo}{}`.not.toParse(); }); + it("\\xdef should expand definition", function() { + expect`\def\foo{a}\xdef\bar{\foo}\def\foo{}\bar`.toParseLike`a`; + // \def\noexpand\foo{} expands into \def\foo{} + expect`\def\foo{a}\xdef\bar{\def\noexpand\foo{}}\foo\bar\foo`.toParseLike`a`; + // \foo\noexpand\foo expands into a\foo + expect`\def\foo{a}\xdef\bar{\foo\noexpand\foo}\def\foo{b}\bar`.toParseLike`ab`; + // \foo is not defined + expect`\xdef\bar{\foo}`.not.toParse(); + }); + it("\\def should be handled in Parser", () => { expect`\gdef\foo{1}`.toParse(new Settings({maxExpand: 0})); expect`2^\def\foo{1}2`.not.toParse(); @@ -3177,13 +3211,23 @@ describe("A macro expander", function() { "\\x\\def\\x{5}\\x}\\x").toParseLike`1{2{34}35}3`; }); - it("\\global needs to followed by \\def", () => { + it("\\global needs to followed by macro prefixes, \\def or \\edef", () => { expect`\global\def\foo{}\foo`.toParseLike``; + expect`\global\edef\foo{}\foo`.toParseLike``; expect`\def\DEF{\def}\global\DEF\foo{}\foo`.toParseLike``; + expect`\global\global\def\foo{}\foo`.toParseLike``; + expect`\global\long\def\foo{}\foo`.toParseLike``; expect`\global\foo`.not.toParse(); expect`\global\bar x`.not.toParse(); }); + it("\\long needs to followed by macro prefixes, \\def or \\edef", () => { + expect`\long\def\foo{}\foo`.toParseLike``; + expect`\long\edef\foo{}\foo`.toParseLike``; + expect`\long\global\def\foo{}\foo`.toParseLike``; + expect`\long\foo`.not.toParse(); + }); + it("Macro arguments do not generate groups", () => { expect("\\def\\x{1}\\x\\def\\foo#1{#1}\\foo{\\x\\def\\x{2}\\x}\\x") .toParseLike`1122`; @@ -3225,6 +3269,30 @@ describe("A macro expander", function() { expect(macros["\\foo"]).toBeTruthy(); }); + it("\\let copies the definition", () => { + expect`\let\foo=\frac\def\frac{}\foo12`.toParseLike`\frac12`; + expect`\def\foo{1}\let\bar\foo\def\foo{2}\bar`.toParseLike`1`; + expect`\let\foo=\kern\edef\bar{\foo1em}\let\kern=\relax\bar`.toParseLike`\kern1em`; + // \foo = { (left brace) + expect`\let\foo{\frac\foo1}{2}`.toParseLike`\frac{1}{2}`; + // \equals = = (equal sign) + expect`\let\equals==a\equals b`.toParseLike`a=b`; + // \foo should not be expandable and not affected by \noexpand or \edef + expect`\let\foo=x\noexpand\foo`.toParseLike`x`; + expect`\let\foo=x\edef\bar{\foo}\def\foo{y}\bar`.toParseLike`y`; + }); + + it("\\let should consume one optional space after equals sign", () => { + // https://tex.stackexchange.com/questions/141166/let-foo-bar-vs-let-foo-bar-let-with-equals-sign + expect`\def\:{\let\space= }\: \text{\space}`.toParseLike`\text{ }`; + const tree = getParsed`\def\bold{\bgroup\bf\let\next= }\bold{a}`; + expect(tree).toMatchSnapshot(); + }); + + it("\\futurelet should parse correctly", () => { + expect`\futurelet\foo\frac1{2+\foo}`.toParseLike`\frac1{2+1}`; + }); + it("\\newcommand doesn't change settings.macros", () => { const macros = {}; expect`\newcommand\foo{x^2}\foo+\foo`.toParse(new Settings({macros}));