Advanced macro support and magic \dots (#794)

* Advanced macro support and magic \dots

* Fix \relax behavior

* Use \DOTSB in \iff, \implies, \impliedby

* Add multiple expansion test

* Implement some of @kevinbarash's comments

* More @kevinbarabash comments

* Token moved from merge

* Add type to defineMacro

* @flow
This commit is contained in:
Erik Demaine
2017-09-05 09:27:04 +09:00
committed by Kevin Barabash
parent 0c2dd845f3
commit 6857689946
8 changed files with 406 additions and 97 deletions

View File

@@ -35,12 +35,12 @@ const tokenRegex = new RegExp(
"([ \r\n\t]+)|" + // whitespace
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
"|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
"|\\\\(?:[a-zA-Z@]+|[^\uD800-\uDFFF])" + // function name
")"
);
/** Main Lexer class */
class Lexer implements LexerInterface {
export default class Lexer implements LexerInterface {
input: string;
pos: number;
@@ -71,5 +71,3 @@ class Lexer implements LexerInterface {
return new Token(text, start, end, this);
}
}
module.exports = Lexer;

View File

@@ -4,6 +4,7 @@
*/
import Lexer from "./Lexer";
import {Token} from "./Token";
import builtinMacros from "./macros";
import ParseError from "./ParseError";
import objectAssign from "object-assign";
@@ -17,24 +18,75 @@ class MacroExpander {
}
/**
* Recursively expand first token, then return first non-expandable token.
* Returns the topmost token on the stack, without expanding it.
* Similar in behavior to TeX's `\futurelet`.
*/
future() {
if (this.stack.length === 0) {
this.stack.push(this.lexer.lex());
}
return this.stack[this.stack.length - 1];
}
/**
* Remove and return the next unexpanded token.
*/
popToken() {
this.future(); // ensure non-empty stack
return this.stack.pop();
}
/**
* Consume all following space tokens, without expansion.
*/
consumeSpaces() {
for (;;) {
const token = this.future();
if (token.text === " ") {
this.stack.pop();
} else {
break;
}
}
}
/**
* Expand the next token only once if possible.
*
* If the token is expanded, the resulting tokens will be pushed onto
* the stack in reverse order and will be returned as an array,
* also in reverse order.
*
* If not, the next token will be returned without removing it
* from the stack. This case can be detected by a `Token` return value
* instead of an `Array` return value.
*
* In either case, the next token will be on the top of the stack,
* or the stack will be empty.
*
* Used to implement `expandAfterFuture` and `expandNextToken`.
*
* At the moment, macro expansion doesn't handle delimited macros,
* i.e. things like those defined by \def\foo#1\end{…}.
* See the TeX book page 202ff. for details on how those should behave.
*/
nextToken() {
for (;;) {
if (this.stack.length === 0) {
this.stack.push(this.lexer.lex());
}
const topToken = this.stack.pop();
expandOnce() {
const topToken = this.popToken();
const name = topToken.text;
if (!(name.charAt(0) === "\\" && this.macros.hasOwnProperty(name))) {
const isMacro = (name.charAt(0) === "\\");
if (isMacro) {
// Consume all spaces after \macro
this.consumeSpaces();
}
if (!(isMacro && this.macros.hasOwnProperty(name))) {
// Fully expanded
this.stack.push(topToken);
return topToken;
}
let tok;
let expansion = this.macros[name];
if (typeof expansion === "function") {
expansion = expansion.call(this);
}
if (typeof expansion === "string") {
let numArgs = 0;
if (expansion.indexOf("#") !== -1) {
@@ -45,26 +97,27 @@ class MacroExpander {
}
const bodyLexer = new Lexer(expansion);
expansion = [];
tok = bodyLexer.lex();
let tok = bodyLexer.lex();
while (tok.text !== "EOF") {
expansion.push(tok);
tok = bodyLexer.lex();
}
expansion.reverse(); // to fit in with stack using push and pop
expansion.numArgs = numArgs;
this.macros[name] = expansion;
// TODO: Could cache macro expansions if it originally came as a
// String (but not those that come in as a Function).
}
if (expansion.numArgs) {
const args = [];
let i;
// obtain arguments, either single token or balanced {…} group
for (i = 0; i < expansion.numArgs; ++i) {
const startOfArg = this.get(true);
for (let i = 0; i < expansion.numArgs; ++i) {
this.consumeSpaces(); // ignore spaces before each argument
const startOfArg = this.popToken();
if (startOfArg.text === "{") {
const arg = [];
let depth = 1;
while (depth !== 0) {
tok = this.get(false);
const tok = this.popToken();
arg.push(tok);
if (tok.text === "{") {
++depth;
@@ -88,8 +141,8 @@ class MacroExpander {
}
// paste arguments in place of the placeholders
expansion = expansion.slice(); // make a shallow copy
for (i = expansion.length - 1; i >= 0; --i) {
tok = expansion[i];
for (let i = expansion.length - 1; i >= 0; --i) {
let tok = expansion[i];
if (tok.text === "#") {
if (i === 0) {
throw new ParseError(
@@ -114,17 +167,56 @@ class MacroExpander {
}
}
}
this.stack = this.stack.concat(expansion);
// Concatenate expansion onto top of stack.
this.stack.push.apply(this.stack, expansion);
return expansion;
}
/**
* Expand the next token only once (if possible), and return the resulting
* top token on the stack (without removing anything from the stack).
* Similar in behavior to TeX's `\expandafter\futurelet`.
* Equivalent to expandOnce() followed by future().
*/
expandAfterFuture() {
this.expandOnce();
return this.future();
}
/**
* Recursively expand first token, then return first non-expandable token.
*/
expandNextToken() {
for (;;) {
const expanded = this.expandOnce();
// expandOnce returns Token if and only if it's fully expanded.
if (expanded instanceof Token) {
// \relax stops the expansion, but shouldn't get returned (a
// null return value couldn't get implemented as a function).
if (expanded.text === "\\relax") {
this.stack.pop();
} else {
return this.stack.pop(); // === expanded
}
}
}
}
/**
* Recursively expand first token, then return first non-expandable token.
* If given a `true` argument, skips over any leading whitespace in
* expansion, instead returning the first non-whitespace token
* (like TeX's \ignorespaces).
* Any skipped whitespace is stored in `this.discardedWhiteSpace`
* so that `unget` can correctly undo the effects of `get`.
*/
get(ignoreSpace) {
this.discardedWhiteSpace = [];
let token = this.nextToken();
let token = this.expandNextToken();
if (ignoreSpace) {
while (token.text === " ") {
this.discardedWhiteSpace.push(token);
token = this.nextToken();
token = this.expandNextToken();
}
}
return token;

View File

@@ -1,10 +1,14 @@
// @flow
/**
* Predefined macros for KaTeX.
* This can be used to define some commands in terms of others.
*/
import symbols from "./symbols";
import utils from "./utils";
// This function might one day accept additional argument and do more things.
function defineMacro(name, body) {
function defineMacro(name: string, body: string | () => string) {
module.exports[name] = body;
}
@@ -26,6 +30,7 @@ defineMacro("\\clap", "\\mathclap{\\textrm{#1}}");
//////////////////////////////////////////////////////////////////////
// amsmath.sty
// http://mirrors.concertpass.com/tex-archive/macros/latex/required/amsmath/amsmath.pdf
// \def\overset#1#2{\binrel@{#2}\binrel@@{\mathop{\kern\z@#2}\limits^{#1}}}
defineMacro("\\overset", "\\mathop{#2}\\limits^{#1}");
@@ -34,14 +39,162 @@ defineMacro("\\underset", "\\mathop{#2}\\limits_{#1}");
// \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}}
defineMacro("\\boxed", "\\fbox{\\displaystyle{#1}}");
//TODO: When implementing \dots, should ideally add the \DOTSB indicator
// into the macro, to indicate these are binary operators.
// \def\iff{\DOTSB\;\Longleftrightarrow\;}
// \def\implies{\DOTSB\;\Longrightarrow\;}
// \def\impliedby{\DOTSB\;\Longleftarrow\;}
defineMacro("\\iff", "\\;\\Longleftrightarrow\\;");
defineMacro("\\implies", "\\;\\Longrightarrow\\;");
defineMacro("\\impliedby", "\\;\\Longleftarrow\\;");
defineMacro("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;");
defineMacro("\\implies", "\\DOTSB\\;\\Longrightarrow\\;");
defineMacro("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;");
// AMSMath's automatic \dots, based on \mdots@@ macro.
const dotsByToken = {
',': '\\dotsc',
'\\not': '\\dotsb',
// \keybin@ checks for the following:
'+': '\\dotsb',
'=': '\\dotsb',
'<': '\\dotsb',
'>': '\\dotsb',
'-': '\\dotsb',
'*': '\\dotsb',
':': '\\dotsb',
// Symbols whose definition starts with \DOTSB:
'\\DOTSB': '\\dotsb',
'\\coprod': '\\dotsb',
'\\bigvee': '\\dotsb',
'\\bigwedge': '\\dotsb',
'\\biguplus': '\\dotsb',
'\\bigcap': '\\dotsb',
'\\bigcup': '\\dotsb',
'\\prod': '\\dotsb',
'\\sum': '\\dotsb',
'\\bigotimes': '\\dotsb',
'\\bigoplus': '\\dotsb',
'\\bigodot': '\\dotsb',
'\\bigsqcup': '\\dotsb',
'\\implies': '\\dotsb',
'\\impliedby': '\\dotsb',
'\\And': '\\dotsb',
'\\longrightarrow': '\\dotsb',
'\\Longrightarrow': '\\dotsb',
'\\longleftarrow': '\\dotsb',
'\\Longleftarrow': '\\dotsb',
'\\longleftrightarrow': '\\dotsb',
'\\Longleftrightarrow': '\\dotsb',
'\\mapsto': '\\dotsb',
'\\longmapsto': '\\dotsb',
'\\hookrightarrow': '\\dotsb',
'\\iff': '\\dotsb',
'\\doteq': '\\dotsb',
// Symbols whose definition starts with \mathbin:
'\\mathbin': '\\dotsb',
'\\bmod': '\\dotsb',
// Symbols whose definition starts with \mathrel:
'\\mathrel': '\\dotsb',
'\\relbar': '\\dotsb',
'\\Relbar': '\\dotsb',
'\\xrightarrow': '\\dotsb',
'\\xleftarrow': '\\dotsb',
// Symbols whose definition starts with \DOTSI:
'\\DOTSI': '\\dotsi',
'\\int': '\\dotsi',
'\\oint': '\\dotsi',
'\\iint': '\\dotsi',
'\\iiint': '\\dotsi',
'\\iiiint': '\\dotsi',
'\\idotsint': '\\dotsi',
// Symbols whose definition starts with \DOTSX:
'\\DOTSX': '\\dotsx',
};
defineMacro("\\dots", function() {
// TODO: If used in text mode, should expand to \textellipsis.
// However, in KaTeX, \textellipsis and \ldots behave the same
// (in text mode), and it's unlikely we'd see any of the math commands
// that affect the behavior of \dots when in text mode. So fine for now
// (until we support \ifmmode ... \else ... \fi).
let thedots = '\\dotso';
const next = this.expandAfterFuture().text;
if (next in dotsByToken) {
thedots = dotsByToken[next];
} else if (next.substr(0, 4) === '\\not') {
thedots = '\\dotsb';
} else if (next in symbols.math) {
if (utils.contains(['bin', 'rel'], symbols.math[next].group)) {
thedots = '\\dotsb';
}
}
return thedots;
});
const spaceAfterDots = {
// \rightdelim@ checks for the following:
')': true,
']': true,
'\\rbrack': true,
'\\}': true,
'\\rbrace': true,
'\\rangle': true,
'\\rceil': true,
'\\rfloor': true,
'\\rgroup': true,
'\\rmoustache': true,
'\\right': true,
'\\bigr': true,
'\\biggr': true,
'\\Bigr': true,
'\\Biggr': true,
// \extra@ also tests for the following:
'$': true,
// \extrap@ checks for the following:
';': true,
'.': true,
',': true,
};
defineMacro("\\dotso", function() {
const next = this.future().text;
if (next in spaceAfterDots) {
return "\\ldots\\,";
} else {
return "\\ldots";
}
});
defineMacro("\\dotsc", function() {
const next = this.future().text;
// \dotsc uses \extra@ but not \extrap@, instead specially checking for
// ';' and '.', but doesn't check for ','.
if (next in spaceAfterDots && next !== ',') {
return "\\ldots\\,";
} else {
return "\\ldots";
}
});
defineMacro("\\cdots", function() {
const next = this.future().text;
if (next in spaceAfterDots) {
return "\\@cdots\\,";
} else {
return "\\@cdots";
}
});
defineMacro("\\dotsb", "\\cdots");
defineMacro("\\dotsm", "\\cdots");
defineMacro("\\dotsi", "\\!\\cdots");
// amsmath doesn't actually define \dotsx, but \dots followed by a macro
// starting with \DOTSX implies \dotso, and then \extra@ detects this case
// and forces the added `\,`.
defineMacro("\\dotsx", "\\ldots\,");
// \let\DOTSI\relax
// \let\DOTSB\relax
// \let\DOTSX\relax
defineMacro("\\DOTSI", "\\relax");
defineMacro("\\DOTSB", "\\relax");
defineMacro("\\DOTSX", "\\relax");
// http://texdoc.net/texmf-dist/doc/latex/amsmath/amsmath.pdf
defineMacro("\\thinspace", "\\,"); // \let\thinspace\,

View File

@@ -602,7 +602,7 @@ defineSymbol(text, main, inner, "\u2026", "\\textellipsis");
defineSymbol(math, main, inner, "\u2026", "\\mathellipsis");
defineSymbol(text, main, inner, "\u2026", "\\ldots", true);
defineSymbol(math, main, inner, "\u2026", "\\ldots", true);
defineSymbol(math, main, inner, "\u22ef", "\\cdots", true);
defineSymbol(math, main, inner, "\u22ef", "\\@cdots", true);
defineSymbol(math, main, inner, "\u22f1", "\\ddots", true);
defineSymbol(math, main, textord, "\u22ee", "\\vdots", true);
defineSymbol(math, main, accent, "\u00b4", "\\acute");

View File

@@ -783,6 +783,11 @@ describe("A text parser", function() {
it("should parse math within text group", function() {
expect(textWithEmbeddedMath).toParse();
});
it("should omit spaces after commands", function() {
expect("\\text{\\textellipsis !}")
.toParseLike("\\text{\\textellipsis!}");
});
});
describe("A color parser", function() {
@@ -2314,6 +2319,27 @@ describe("A macro expander", function() {
compareParseTree("e^\\foo", "e^1 23", {"\\foo": "123"});
});
it("should preserve leading spaces inside macro definition", function() {
compareParseTree("\\text{\\foo}", "\\text{ x}", {"\\foo": " x"});
});
it("should preserve leading spaces inside macro argument", function() {
compareParseTree("\\text{\\foo{ x}}", "\\text{ x}", {"\\foo": "#1"});
});
it("should ignore expanded spaces in math mode", function() {
compareParseTree("\\foo", "x", {"\\foo": " x"});
});
it("should consume spaces after macro", function() {
compareParseTree("\\text{\\foo }", "\\text{x}", {"\\foo": "x"});
});
it("should consume spaces between arguments", function() {
compareParseTree("\\text{\\foo 1 2}", "\\text{12end}", {"\\foo": "#1#2end"});
compareParseTree("\\text{\\foo {1} {2}}", "\\text{12end}", {"\\foo": "#1#2end"});
});
it("should allow for multiple expansion", function() {
compareParseTree("1\\foo2", "1aa2", {
"\\foo": "\\bar\\bar",
@@ -2321,6 +2347,41 @@ describe("A macro expander", function() {
});
});
it("should allow for multiple expansion with argument", function() {
compareParseTree("1\\foo2", "12222", {
"\\foo": "\\bar{#1}\\bar{#1}",
"\\bar": "#1#1",
});
});
it("should allow for macro argument", function() {
compareParseTree("\\foo\\bar", "(x)", {
"\\foo": "(#1)",
"\\bar": "x",
});
});
it("should allow for space macro argument (text version)", function() {
compareParseTree("\\text{\\foo\\bar}", "\\text{( )}", {
"\\foo": "(#1)",
"\\bar": " ",
});
});
it("should allow for space macro argument (math version)", function() {
compareParseTree("\\foo\\bar", "()", {
"\\foo": "(#1)",
"\\bar": " ",
});
});
it("should allow for empty macro argument", function() {
compareParseTree("\\foo\\bar", "()", {
"\\foo": "(#1)",
"\\bar": "",
});
});
it("should expand the \\overset macro as expected", function() {
expect("\\overset?=").toParseLike("\\mathop{=}\\limits^{?}");
expect("\\overset{x=y}{\sqrt{ab}}")

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@@ -77,6 +77,11 @@ DisplayMode:
DisplayStyle: |
{\displaystyle\sqrt{x}}{\sqrt{x}}
{\displaystyle \frac12}{\frac12}{\displaystyle x^1_2}{x^1_2}
Dots: |
\begin{array}{l}
\cdots;\dots+\dots\int\dots,\dots \\
\cdots{};\ldots+\ldots\int\ldots,\ldots
\end{array}
Exponents: a^{a^a_a}_{a^a_a}
ExtensibleArrows: |
\begin{array}{l}