mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 19:28:39 +00:00
Advanced macro support and magic \dots (#794)
* Advanced macro support and magic \dots * Fix \relax behavior * Use \DOTSB in \iff, \implies, \impliedby * Add multiple expansion test * Implement some of @kevinbarash's comments * More @kevinbarabash comments * Token moved from merge * Add type to defineMacro * @flow
This commit is contained in:
committed by
Kevin Barabash
parent
0c2dd845f3
commit
6857689946
@@ -35,12 +35,12 @@ const tokenRegex = new RegExp(
|
||||
"([ \r\n\t]+)|" + // whitespace
|
||||
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
|
||||
"|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
|
||||
"|\\\\(?:[a-zA-Z@]+|[^\uD800-\uDFFF])" + // function name
|
||||
")"
|
||||
);
|
||||
|
||||
/** Main Lexer class */
|
||||
class Lexer implements LexerInterface {
|
||||
export default class Lexer implements LexerInterface {
|
||||
input: string;
|
||||
pos: number;
|
||||
|
||||
@@ -71,5 +71,3 @@ class Lexer implements LexerInterface {
|
||||
return new Token(text, start, end, this);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Lexer;
|
||||
|
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
|
||||
import Lexer from "./Lexer";
|
||||
import {Token} from "./Token";
|
||||
import builtinMacros from "./macros";
|
||||
import ParseError from "./ParseError";
|
||||
import objectAssign from "object-assign";
|
||||
@@ -17,24 +18,75 @@ class MacroExpander {
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively expand first token, then return first non-expandable token.
|
||||
* Returns the topmost token on the stack, without expanding it.
|
||||
* Similar in behavior to TeX's `\futurelet`.
|
||||
*/
|
||||
future() {
|
||||
if (this.stack.length === 0) {
|
||||
this.stack.push(this.lexer.lex());
|
||||
}
|
||||
return this.stack[this.stack.length - 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove and return the next unexpanded token.
|
||||
*/
|
||||
popToken() {
|
||||
this.future(); // ensure non-empty stack
|
||||
return this.stack.pop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume all following space tokens, without expansion.
|
||||
*/
|
||||
consumeSpaces() {
|
||||
for (;;) {
|
||||
const token = this.future();
|
||||
if (token.text === " ") {
|
||||
this.stack.pop();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the next token only once if possible.
|
||||
*
|
||||
* If the token is expanded, the resulting tokens will be pushed onto
|
||||
* the stack in reverse order and will be returned as an array,
|
||||
* also in reverse order.
|
||||
*
|
||||
* If not, the next token will be returned without removing it
|
||||
* from the stack. This case can be detected by a `Token` return value
|
||||
* instead of an `Array` return value.
|
||||
*
|
||||
* In either case, the next token will be on the top of the stack,
|
||||
* or the stack will be empty.
|
||||
*
|
||||
* Used to implement `expandAfterFuture` and `expandNextToken`.
|
||||
*
|
||||
* At the moment, macro expansion doesn't handle delimited macros,
|
||||
* i.e. things like those defined by \def\foo#1\end{…}.
|
||||
* See the TeX book page 202ff. for details on how those should behave.
|
||||
*/
|
||||
nextToken() {
|
||||
for (;;) {
|
||||
if (this.stack.length === 0) {
|
||||
this.stack.push(this.lexer.lex());
|
||||
}
|
||||
const topToken = this.stack.pop();
|
||||
expandOnce() {
|
||||
const topToken = this.popToken();
|
||||
const name = topToken.text;
|
||||
if (!(name.charAt(0) === "\\" && this.macros.hasOwnProperty(name))) {
|
||||
const isMacro = (name.charAt(0) === "\\");
|
||||
if (isMacro) {
|
||||
// Consume all spaces after \macro
|
||||
this.consumeSpaces();
|
||||
}
|
||||
if (!(isMacro && this.macros.hasOwnProperty(name))) {
|
||||
// Fully expanded
|
||||
this.stack.push(topToken);
|
||||
return topToken;
|
||||
}
|
||||
let tok;
|
||||
let expansion = this.macros[name];
|
||||
if (typeof expansion === "function") {
|
||||
expansion = expansion.call(this);
|
||||
}
|
||||
if (typeof expansion === "string") {
|
||||
let numArgs = 0;
|
||||
if (expansion.indexOf("#") !== -1) {
|
||||
@@ -45,26 +97,27 @@ class MacroExpander {
|
||||
}
|
||||
const bodyLexer = new Lexer(expansion);
|
||||
expansion = [];
|
||||
tok = bodyLexer.lex();
|
||||
let tok = bodyLexer.lex();
|
||||
while (tok.text !== "EOF") {
|
||||
expansion.push(tok);
|
||||
tok = bodyLexer.lex();
|
||||
}
|
||||
expansion.reverse(); // to fit in with stack using push and pop
|
||||
expansion.numArgs = numArgs;
|
||||
this.macros[name] = expansion;
|
||||
// TODO: Could cache macro expansions if it originally came as a
|
||||
// String (but not those that come in as a Function).
|
||||
}
|
||||
if (expansion.numArgs) {
|
||||
const args = [];
|
||||
let i;
|
||||
// obtain arguments, either single token or balanced {…} group
|
||||
for (i = 0; i < expansion.numArgs; ++i) {
|
||||
const startOfArg = this.get(true);
|
||||
for (let i = 0; i < expansion.numArgs; ++i) {
|
||||
this.consumeSpaces(); // ignore spaces before each argument
|
||||
const startOfArg = this.popToken();
|
||||
if (startOfArg.text === "{") {
|
||||
const arg = [];
|
||||
let depth = 1;
|
||||
while (depth !== 0) {
|
||||
tok = this.get(false);
|
||||
const tok = this.popToken();
|
||||
arg.push(tok);
|
||||
if (tok.text === "{") {
|
||||
++depth;
|
||||
@@ -88,8 +141,8 @@ class MacroExpander {
|
||||
}
|
||||
// paste arguments in place of the placeholders
|
||||
expansion = expansion.slice(); // make a shallow copy
|
||||
for (i = expansion.length - 1; i >= 0; --i) {
|
||||
tok = expansion[i];
|
||||
for (let i = expansion.length - 1; i >= 0; --i) {
|
||||
let tok = expansion[i];
|
||||
if (tok.text === "#") {
|
||||
if (i === 0) {
|
||||
throw new ParseError(
|
||||
@@ -114,17 +167,56 @@ class MacroExpander {
|
||||
}
|
||||
}
|
||||
}
|
||||
this.stack = this.stack.concat(expansion);
|
||||
// Concatenate expansion onto top of stack.
|
||||
this.stack.push.apply(this.stack, expansion);
|
||||
return expansion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand the next token only once (if possible), and return the resulting
|
||||
* top token on the stack (without removing anything from the stack).
|
||||
* Similar in behavior to TeX's `\expandafter\futurelet`.
|
||||
* Equivalent to expandOnce() followed by future().
|
||||
*/
|
||||
expandAfterFuture() {
|
||||
this.expandOnce();
|
||||
return this.future();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively expand first token, then return first non-expandable token.
|
||||
*/
|
||||
expandNextToken() {
|
||||
for (;;) {
|
||||
const expanded = this.expandOnce();
|
||||
// expandOnce returns Token if and only if it's fully expanded.
|
||||
if (expanded instanceof Token) {
|
||||
// \relax stops the expansion, but shouldn't get returned (a
|
||||
// null return value couldn't get implemented as a function).
|
||||
if (expanded.text === "\\relax") {
|
||||
this.stack.pop();
|
||||
} else {
|
||||
return this.stack.pop(); // === expanded
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively expand first token, then return first non-expandable token.
|
||||
* If given a `true` argument, skips over any leading whitespace in
|
||||
* expansion, instead returning the first non-whitespace token
|
||||
* (like TeX's \ignorespaces).
|
||||
* Any skipped whitespace is stored in `this.discardedWhiteSpace`
|
||||
* so that `unget` can correctly undo the effects of `get`.
|
||||
*/
|
||||
get(ignoreSpace) {
|
||||
this.discardedWhiteSpace = [];
|
||||
let token = this.nextToken();
|
||||
let token = this.expandNextToken();
|
||||
if (ignoreSpace) {
|
||||
while (token.text === " ") {
|
||||
this.discardedWhiteSpace.push(token);
|
||||
token = this.nextToken();
|
||||
token = this.expandNextToken();
|
||||
}
|
||||
}
|
||||
return token;
|
||||
|
165
src/macros.js
165
src/macros.js
@@ -1,10 +1,14 @@
|
||||
// @flow
|
||||
/**
|
||||
* Predefined macros for KaTeX.
|
||||
* This can be used to define some commands in terms of others.
|
||||
*/
|
||||
|
||||
import symbols from "./symbols";
|
||||
import utils from "./utils";
|
||||
|
||||
// This function might one day accept additional argument and do more things.
|
||||
function defineMacro(name, body) {
|
||||
function defineMacro(name: string, body: string | () => string) {
|
||||
module.exports[name] = body;
|
||||
}
|
||||
|
||||
@@ -26,6 +30,7 @@ defineMacro("\\clap", "\\mathclap{\\textrm{#1}}");
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// amsmath.sty
|
||||
// http://mirrors.concertpass.com/tex-archive/macros/latex/required/amsmath/amsmath.pdf
|
||||
|
||||
// \def\overset#1#2{\binrel@{#2}\binrel@@{\mathop{\kern\z@#2}\limits^{#1}}}
|
||||
defineMacro("\\overset", "\\mathop{#2}\\limits^{#1}");
|
||||
@@ -34,14 +39,162 @@ defineMacro("\\underset", "\\mathop{#2}\\limits_{#1}");
|
||||
// \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}}
|
||||
defineMacro("\\boxed", "\\fbox{\\displaystyle{#1}}");
|
||||
|
||||
//TODO: When implementing \dots, should ideally add the \DOTSB indicator
|
||||
// into the macro, to indicate these are binary operators.
|
||||
// \def\iff{\DOTSB\;\Longleftrightarrow\;}
|
||||
// \def\implies{\DOTSB\;\Longrightarrow\;}
|
||||
// \def\impliedby{\DOTSB\;\Longleftarrow\;}
|
||||
defineMacro("\\iff", "\\;\\Longleftrightarrow\\;");
|
||||
defineMacro("\\implies", "\\;\\Longrightarrow\\;");
|
||||
defineMacro("\\impliedby", "\\;\\Longleftarrow\\;");
|
||||
defineMacro("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;");
|
||||
defineMacro("\\implies", "\\DOTSB\\;\\Longrightarrow\\;");
|
||||
defineMacro("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;");
|
||||
|
||||
// AMSMath's automatic \dots, based on \mdots@@ macro.
|
||||
const dotsByToken = {
|
||||
',': '\\dotsc',
|
||||
'\\not': '\\dotsb',
|
||||
// \keybin@ checks for the following:
|
||||
'+': '\\dotsb',
|
||||
'=': '\\dotsb',
|
||||
'<': '\\dotsb',
|
||||
'>': '\\dotsb',
|
||||
'-': '\\dotsb',
|
||||
'*': '\\dotsb',
|
||||
':': '\\dotsb',
|
||||
// Symbols whose definition starts with \DOTSB:
|
||||
'\\DOTSB': '\\dotsb',
|
||||
'\\coprod': '\\dotsb',
|
||||
'\\bigvee': '\\dotsb',
|
||||
'\\bigwedge': '\\dotsb',
|
||||
'\\biguplus': '\\dotsb',
|
||||
'\\bigcap': '\\dotsb',
|
||||
'\\bigcup': '\\dotsb',
|
||||
'\\prod': '\\dotsb',
|
||||
'\\sum': '\\dotsb',
|
||||
'\\bigotimes': '\\dotsb',
|
||||
'\\bigoplus': '\\dotsb',
|
||||
'\\bigodot': '\\dotsb',
|
||||
'\\bigsqcup': '\\dotsb',
|
||||
'\\implies': '\\dotsb',
|
||||
'\\impliedby': '\\dotsb',
|
||||
'\\And': '\\dotsb',
|
||||
'\\longrightarrow': '\\dotsb',
|
||||
'\\Longrightarrow': '\\dotsb',
|
||||
'\\longleftarrow': '\\dotsb',
|
||||
'\\Longleftarrow': '\\dotsb',
|
||||
'\\longleftrightarrow': '\\dotsb',
|
||||
'\\Longleftrightarrow': '\\dotsb',
|
||||
'\\mapsto': '\\dotsb',
|
||||
'\\longmapsto': '\\dotsb',
|
||||
'\\hookrightarrow': '\\dotsb',
|
||||
'\\iff': '\\dotsb',
|
||||
'\\doteq': '\\dotsb',
|
||||
// Symbols whose definition starts with \mathbin:
|
||||
'\\mathbin': '\\dotsb',
|
||||
'\\bmod': '\\dotsb',
|
||||
// Symbols whose definition starts with \mathrel:
|
||||
'\\mathrel': '\\dotsb',
|
||||
'\\relbar': '\\dotsb',
|
||||
'\\Relbar': '\\dotsb',
|
||||
'\\xrightarrow': '\\dotsb',
|
||||
'\\xleftarrow': '\\dotsb',
|
||||
// Symbols whose definition starts with \DOTSI:
|
||||
'\\DOTSI': '\\dotsi',
|
||||
'\\int': '\\dotsi',
|
||||
'\\oint': '\\dotsi',
|
||||
'\\iint': '\\dotsi',
|
||||
'\\iiint': '\\dotsi',
|
||||
'\\iiiint': '\\dotsi',
|
||||
'\\idotsint': '\\dotsi',
|
||||
// Symbols whose definition starts with \DOTSX:
|
||||
'\\DOTSX': '\\dotsx',
|
||||
};
|
||||
|
||||
defineMacro("\\dots", function() {
|
||||
// TODO: If used in text mode, should expand to \textellipsis.
|
||||
// However, in KaTeX, \textellipsis and \ldots behave the same
|
||||
// (in text mode), and it's unlikely we'd see any of the math commands
|
||||
// that affect the behavior of \dots when in text mode. So fine for now
|
||||
// (until we support \ifmmode ... \else ... \fi).
|
||||
let thedots = '\\dotso';
|
||||
const next = this.expandAfterFuture().text;
|
||||
if (next in dotsByToken) {
|
||||
thedots = dotsByToken[next];
|
||||
} else if (next.substr(0, 4) === '\\not') {
|
||||
thedots = '\\dotsb';
|
||||
} else if (next in symbols.math) {
|
||||
if (utils.contains(['bin', 'rel'], symbols.math[next].group)) {
|
||||
thedots = '\\dotsb';
|
||||
}
|
||||
}
|
||||
return thedots;
|
||||
});
|
||||
|
||||
const spaceAfterDots = {
|
||||
// \rightdelim@ checks for the following:
|
||||
')': true,
|
||||
']': true,
|
||||
'\\rbrack': true,
|
||||
'\\}': true,
|
||||
'\\rbrace': true,
|
||||
'\\rangle': true,
|
||||
'\\rceil': true,
|
||||
'\\rfloor': true,
|
||||
'\\rgroup': true,
|
||||
'\\rmoustache': true,
|
||||
'\\right': true,
|
||||
'\\bigr': true,
|
||||
'\\biggr': true,
|
||||
'\\Bigr': true,
|
||||
'\\Biggr': true,
|
||||
// \extra@ also tests for the following:
|
||||
'$': true,
|
||||
// \extrap@ checks for the following:
|
||||
';': true,
|
||||
'.': true,
|
||||
',': true,
|
||||
};
|
||||
|
||||
defineMacro("\\dotso", function() {
|
||||
const next = this.future().text;
|
||||
if (next in spaceAfterDots) {
|
||||
return "\\ldots\\,";
|
||||
} else {
|
||||
return "\\ldots";
|
||||
}
|
||||
});
|
||||
|
||||
defineMacro("\\dotsc", function() {
|
||||
const next = this.future().text;
|
||||
// \dotsc uses \extra@ but not \extrap@, instead specially checking for
|
||||
// ';' and '.', but doesn't check for ','.
|
||||
if (next in spaceAfterDots && next !== ',') {
|
||||
return "\\ldots\\,";
|
||||
} else {
|
||||
return "\\ldots";
|
||||
}
|
||||
});
|
||||
|
||||
defineMacro("\\cdots", function() {
|
||||
const next = this.future().text;
|
||||
if (next in spaceAfterDots) {
|
||||
return "\\@cdots\\,";
|
||||
} else {
|
||||
return "\\@cdots";
|
||||
}
|
||||
});
|
||||
|
||||
defineMacro("\\dotsb", "\\cdots");
|
||||
defineMacro("\\dotsm", "\\cdots");
|
||||
defineMacro("\\dotsi", "\\!\\cdots");
|
||||
// amsmath doesn't actually define \dotsx, but \dots followed by a macro
|
||||
// starting with \DOTSX implies \dotso, and then \extra@ detects this case
|
||||
// and forces the added `\,`.
|
||||
defineMacro("\\dotsx", "\\ldots\,");
|
||||
|
||||
// \let\DOTSI\relax
|
||||
// \let\DOTSB\relax
|
||||
// \let\DOTSX\relax
|
||||
defineMacro("\\DOTSI", "\\relax");
|
||||
defineMacro("\\DOTSB", "\\relax");
|
||||
defineMacro("\\DOTSX", "\\relax");
|
||||
|
||||
// http://texdoc.net/texmf-dist/doc/latex/amsmath/amsmath.pdf
|
||||
defineMacro("\\thinspace", "\\,"); // \let\thinspace\,
|
||||
|
@@ -602,7 +602,7 @@ defineSymbol(text, main, inner, "\u2026", "\\textellipsis");
|
||||
defineSymbol(math, main, inner, "\u2026", "\\mathellipsis");
|
||||
defineSymbol(text, main, inner, "\u2026", "\\ldots", true);
|
||||
defineSymbol(math, main, inner, "\u2026", "\\ldots", true);
|
||||
defineSymbol(math, main, inner, "\u22ef", "\\cdots", true);
|
||||
defineSymbol(math, main, inner, "\u22ef", "\\@cdots", true);
|
||||
defineSymbol(math, main, inner, "\u22f1", "\\ddots", true);
|
||||
defineSymbol(math, main, textord, "\u22ee", "\\vdots", true);
|
||||
defineSymbol(math, main, accent, "\u00b4", "\\acute");
|
||||
|
@@ -783,6 +783,11 @@ describe("A text parser", function() {
|
||||
it("should parse math within text group", function() {
|
||||
expect(textWithEmbeddedMath).toParse();
|
||||
});
|
||||
|
||||
it("should omit spaces after commands", function() {
|
||||
expect("\\text{\\textellipsis !}")
|
||||
.toParseLike("\\text{\\textellipsis!}");
|
||||
});
|
||||
});
|
||||
|
||||
describe("A color parser", function() {
|
||||
@@ -2314,6 +2319,27 @@ describe("A macro expander", function() {
|
||||
compareParseTree("e^\\foo", "e^1 23", {"\\foo": "123"});
|
||||
});
|
||||
|
||||
it("should preserve leading spaces inside macro definition", function() {
|
||||
compareParseTree("\\text{\\foo}", "\\text{ x}", {"\\foo": " x"});
|
||||
});
|
||||
|
||||
it("should preserve leading spaces inside macro argument", function() {
|
||||
compareParseTree("\\text{\\foo{ x}}", "\\text{ x}", {"\\foo": "#1"});
|
||||
});
|
||||
|
||||
it("should ignore expanded spaces in math mode", function() {
|
||||
compareParseTree("\\foo", "x", {"\\foo": " x"});
|
||||
});
|
||||
|
||||
it("should consume spaces after macro", function() {
|
||||
compareParseTree("\\text{\\foo }", "\\text{x}", {"\\foo": "x"});
|
||||
});
|
||||
|
||||
it("should consume spaces between arguments", function() {
|
||||
compareParseTree("\\text{\\foo 1 2}", "\\text{12end}", {"\\foo": "#1#2end"});
|
||||
compareParseTree("\\text{\\foo {1} {2}}", "\\text{12end}", {"\\foo": "#1#2end"});
|
||||
});
|
||||
|
||||
it("should allow for multiple expansion", function() {
|
||||
compareParseTree("1\\foo2", "1aa2", {
|
||||
"\\foo": "\\bar\\bar",
|
||||
@@ -2321,6 +2347,41 @@ describe("A macro expander", function() {
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow for multiple expansion with argument", function() {
|
||||
compareParseTree("1\\foo2", "12222", {
|
||||
"\\foo": "\\bar{#1}\\bar{#1}",
|
||||
"\\bar": "#1#1",
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow for macro argument", function() {
|
||||
compareParseTree("\\foo\\bar", "(x)", {
|
||||
"\\foo": "(#1)",
|
||||
"\\bar": "x",
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow for space macro argument (text version)", function() {
|
||||
compareParseTree("\\text{\\foo\\bar}", "\\text{( )}", {
|
||||
"\\foo": "(#1)",
|
||||
"\\bar": " ",
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow for space macro argument (math version)", function() {
|
||||
compareParseTree("\\foo\\bar", "()", {
|
||||
"\\foo": "(#1)",
|
||||
"\\bar": " ",
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow for empty macro argument", function() {
|
||||
compareParseTree("\\foo\\bar", "()", {
|
||||
"\\foo": "(#1)",
|
||||
"\\bar": "",
|
||||
});
|
||||
});
|
||||
|
||||
it("should expand the \\overset macro as expected", function() {
|
||||
expect("\\overset?=").toParseLike("\\mathop{=}\\limits^{?}");
|
||||
expect("\\overset{x=y}{\sqrt{ab}}")
|
||||
|
BIN
test/screenshotter/images/Dots-chrome.png
Normal file
BIN
test/screenshotter/images/Dots-chrome.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.7 KiB |
BIN
test/screenshotter/images/Dots-firefox.png
Normal file
BIN
test/screenshotter/images/Dots-firefox.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.8 KiB |
@@ -77,6 +77,11 @@ DisplayMode:
|
||||
DisplayStyle: |
|
||||
{\displaystyle\sqrt{x}}{\sqrt{x}}
|
||||
{\displaystyle \frac12}{\frac12}{\displaystyle x^1_2}{x^1_2}
|
||||
Dots: |
|
||||
\begin{array}{l}
|
||||
\cdots;\dots+\dots\int\dots,\dots \\
|
||||
\cdots{};\ldots+\ldots\int\ldots,\ldots
|
||||
\end{array}
|
||||
Exponents: a^{a^a_a}_{a^a_a}
|
||||
ExtensibleArrows: |
|
||||
\begin{array}{l}
|
||||
|
Reference in New Issue
Block a user