Files
KaTeX/src/macros.js
Erik Demaine 484d44ee70 Unicode accents (#992)
* Unicode accents

* Lexer now looks for combining dicritical marks and adds them to the same character
* Parser's `parseSymbol` now recognizes both combined and uncombined forms of Unicode accents, and builds accent objects just like the accent functions
* Added CJK support to math mode (not just text mode)

* Add invalid combining character test

* Add MathML test

* Add weak support for other Latin-1 characters

This maintains backwards compatibility, but it uses the wrong font.
There's a TODO to fix this later.

Also refactor symbol code to use for..of

* Update Unicode screenshot

* Remove dot from accented i and j (in math mode)

Also add dotless Unicode characters to support some accented i's and j's

* Fix \imath, \jmath, \pounds, and more tests

* Switch from for..of to .split().forEach()

Save around 800 bytes in minified code

* Fix split

* normalize() detection

* Convert back to vanilla for loops

* Fix merge

* Move normalize dependency to unicodeMake.js

* Make unicodeSymbols into a lookup table instead of macros

This is important for multi-accented characters.

* Add comments about when to run

* Move symbols definition into unicodeMake/Symbols.js

* Remove CJK support in text mode

* Add missing semicolon

* Refactor unicodeAccents to its own file

* Dotless i/j support in text mode

* Remove excess character mappings

* Fix Åå in math mode (still via Times)

* Update to support #1030

* Add accented Greek letter support (for supported Greek symbols)

* Update screenshot

* remove Æ, æ, Ø, ø, and ß from math mode test
2017-12-28 23:32:45 -07:00

404 lines
15 KiB
JavaScript

// @flow
/**
* Predefined macros for KaTeX.
* This can be used to define some commands in terms of others.
*/
import fontMetricsData from "./fontMetricsData";
import symbols from "./symbols";
import utils from "./utils";
import {Token} from "./Token";
/**
* Provides context to macros defined by functions. Implemented by
* MacroExpander.
*/
export interface MacroContextInterface {
/**
* Returns the topmost token on the stack, without expanding it.
* Similar in behavior to TeX's `\futurelet`.
*/
future(): Token;
/**
* Expand the next token only once (if possible), and return the resulting
* top token on the stack (without removing anything from the stack).
* Similar in behavior to TeX's `\expandafter\futurelet`.
*/
expandAfterFuture(): Token;
/**
* Consume the specified number of arguments from the token stream,
* and return the resulting array of arguments.
*/
consumeArgs(numArgs: number): Token[][];
}
/** Macro tokens (in reverse order). */
export type MacroExpansion = {tokens: Token[], numArgs: number};
type MacroDefinition = string | MacroExpansion |
(MacroContextInterface => (string | MacroExpansion));
export type MacroMap = {[string]: MacroDefinition};
const builtinMacros: MacroMap = {};
export default builtinMacros;
// This function might one day accept an additional argument and do more things.
export function defineMacro(name: string, body: MacroDefinition) {
builtinMacros[name] = body;
}
//////////////////////////////////////////////////////////////////////
// macro tools
// LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2
// TeX source: \long\def\@firstoftwo#1#2{#1}
defineMacro("\\@firstoftwo", function(context) {
const args = context.consumeArgs(2);
return {tokens: args[0], numArgs: 0};
});
// LaTeX's \@secondoftwo{#1}{#2} expands to #2, skipping #1
// TeX source: \long\def\@secondoftwo#1#2{#2}
defineMacro("\\@secondoftwo", function(context) {
const args = context.consumeArgs(2);
return {tokens: args[1], numArgs: 0};
});
// LaTeX's \@ifnextchar{#1}{#2}{#3} looks ahead to the next (unexpanded)
// symbol. If it matches #1, then the macro expands to #2; otherwise, #3.
// Note, however, that it does not consume the next symbol in either case.
defineMacro("\\@ifnextchar", function(context) {
const args = context.consumeArgs(3); // symbol, if, else
const nextToken = context.future();
if (args[0].length === 1 && args[0][0].text === nextToken.text) {
return {tokens: args[1], numArgs: 0};
} else {
return {tokens: args[2], numArgs: 0};
}
});
// LaTeX's \@ifstar{#1}{#2} looks ahead to the next (unexpanded) symbol.
// If it is `*`, then it consumes the symbol, and the macro expands to #1;
// otherwise, the macro expands to #2 (without consuming the symbol).
// TeX source: \def\@ifstar#1{\@ifnextchar *{\@firstoftwo{#1}}}
defineMacro("\\@ifstar", "\\@ifnextchar *{\\@firstoftwo{#1}}");
// LaTeX's \TextOrMath{#1}{#2} expands to #1 in text mode, #2 in math mode
defineMacro("\\TextOrMath", function(context) {
const args = context.consumeArgs(2);
if (context.mode === 'text') {
return {tokens: args[0], numArgs: 0};
} else {
return {tokens: args[1], numArgs: 0};
}
});
//////////////////////////////////////////////////////////////////////
// basics
defineMacro("\\bgroup", "{");
defineMacro("\\egroup", "}");
defineMacro("\\begingroup", "{");
defineMacro("\\endgroup", "}");
// Unicode double-struck letters
defineMacro("\u2102", "\\mathbb{C}");
defineMacro("\u210D", "\\mathbb{H}");
defineMacro("\u2115", "\\mathbb{N}");
defineMacro("\u2119", "\\mathbb{P}");
defineMacro("\u211A", "\\mathbb{Q}");
defineMacro("\u211D", "\\mathbb{R}");
defineMacro("\u2124", "\\mathbb{Z}");
// Unicode middle dot
// The KaTeX fonts do not contain U+00B7. Instead, \cdotp displays
// the dot at U+22C5 and gives it punct spacing.
defineMacro("\u00b7", "\\cdotp");
// \llap and \rlap render their contents in text mode
defineMacro("\\llap", "\\mathllap{\\textrm{#1}}");
defineMacro("\\rlap", "\\mathrlap{\\textrm{#1}}");
defineMacro("\\clap", "\\mathclap{\\textrm{#1}}");
//////////////////////////////////////////////////////////////////////
// amsmath.sty
// http://mirrors.concertpass.com/tex-archive/macros/latex/required/amsmath/amsmath.pdf
// \def\overset#1#2{\binrel@{#2}\binrel@@{\mathop{\kern\z@#2}\limits^{#1}}}
defineMacro("\\overset", "\\mathop{#2}\\limits^{#1}");
defineMacro("\\underset", "\\mathop{#2}\\limits_{#1}");
// \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}}
defineMacro("\\boxed", "\\fbox{\\displaystyle{#1}}");
// \def\iff{\DOTSB\;\Longleftrightarrow\;}
// \def\implies{\DOTSB\;\Longrightarrow\;}
// \def\impliedby{\DOTSB\;\Longleftarrow\;}
defineMacro("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;");
defineMacro("\\implies", "\\DOTSB\\;\\Longrightarrow\\;");
defineMacro("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;");
// AMSMath's automatic \dots, based on \mdots@@ macro.
const dotsByToken = {
',': '\\dotsc',
'\\not': '\\dotsb',
// \keybin@ checks for the following:
'+': '\\dotsb',
'=': '\\dotsb',
'<': '\\dotsb',
'>': '\\dotsb',
'-': '\\dotsb',
'*': '\\dotsb',
':': '\\dotsb',
// Symbols whose definition starts with \DOTSB:
'\\DOTSB': '\\dotsb',
'\\coprod': '\\dotsb',
'\\bigvee': '\\dotsb',
'\\bigwedge': '\\dotsb',
'\\biguplus': '\\dotsb',
'\\bigcap': '\\dotsb',
'\\bigcup': '\\dotsb',
'\\prod': '\\dotsb',
'\\sum': '\\dotsb',
'\\bigotimes': '\\dotsb',
'\\bigoplus': '\\dotsb',
'\\bigodot': '\\dotsb',
'\\bigsqcup': '\\dotsb',
'\\implies': '\\dotsb',
'\\impliedby': '\\dotsb',
'\\And': '\\dotsb',
'\\longrightarrow': '\\dotsb',
'\\Longrightarrow': '\\dotsb',
'\\longleftarrow': '\\dotsb',
'\\Longleftarrow': '\\dotsb',
'\\longleftrightarrow': '\\dotsb',
'\\Longleftrightarrow': '\\dotsb',
'\\mapsto': '\\dotsb',
'\\longmapsto': '\\dotsb',
'\\hookrightarrow': '\\dotsb',
'\\iff': '\\dotsb',
'\\doteq': '\\dotsb',
// Symbols whose definition starts with \mathbin:
'\\mathbin': '\\dotsb',
'\\bmod': '\\dotsb',
// Symbols whose definition starts with \mathrel:
'\\mathrel': '\\dotsb',
'\\relbar': '\\dotsb',
'\\Relbar': '\\dotsb',
'\\xrightarrow': '\\dotsb',
'\\xleftarrow': '\\dotsb',
// Symbols whose definition starts with \DOTSI:
'\\DOTSI': '\\dotsi',
'\\int': '\\dotsi',
'\\oint': '\\dotsi',
'\\iint': '\\dotsi',
'\\iiint': '\\dotsi',
'\\iiiint': '\\dotsi',
'\\idotsint': '\\dotsi',
// Symbols whose definition starts with \DOTSX:
'\\DOTSX': '\\dotsx',
};
defineMacro("\\dots", function(context) {
// TODO: If used in text mode, should expand to \textellipsis.
// However, in KaTeX, \textellipsis and \ldots behave the same
// (in text mode), and it's unlikely we'd see any of the math commands
// that affect the behavior of \dots when in text mode. So fine for now
// (until we support \ifmmode ... \else ... \fi).
let thedots = '\\dotso';
const next = context.expandAfterFuture().text;
if (next in dotsByToken) {
thedots = dotsByToken[next];
} else if (next.substr(0, 4) === '\\not') {
thedots = '\\dotsb';
} else if (next in symbols.math) {
if (utils.contains(['bin', 'rel'], symbols.math[next].group)) {
thedots = '\\dotsb';
}
}
return thedots;
});
const spaceAfterDots = {
// \rightdelim@ checks for the following:
')': true,
']': true,
'\\rbrack': true,
'\\}': true,
'\\rbrace': true,
'\\rangle': true,
'\\rceil': true,
'\\rfloor': true,
'\\rgroup': true,
'\\rmoustache': true,
'\\right': true,
'\\bigr': true,
'\\biggr': true,
'\\Bigr': true,
'\\Biggr': true,
// \extra@ also tests for the following:
'$': true,
// \extrap@ checks for the following:
';': true,
'.': true,
',': true,
};
defineMacro("\\dotso", function(context) {
const next = context.future().text;
if (next in spaceAfterDots) {
return "\\ldots\\,";
} else {
return "\\ldots";
}
});
defineMacro("\\dotsc", function(context) {
const next = context.future().text;
// \dotsc uses \extra@ but not \extrap@, instead specially checking for
// ';' and '.', but doesn't check for ','.
if (next in spaceAfterDots && next !== ',') {
return "\\ldots\\,";
} else {
return "\\ldots";
}
});
defineMacro("\\cdots", function(context) {
const next = context.future().text;
if (next in spaceAfterDots) {
return "\\@cdots\\,";
} else {
return "\\@cdots";
}
});
defineMacro("\\dotsb", "\\cdots");
defineMacro("\\dotsm", "\\cdots");
defineMacro("\\dotsi", "\\!\\cdots");
// amsmath doesn't actually define \dotsx, but \dots followed by a macro
// starting with \DOTSX implies \dotso, and then \extra@ detects this case
// and forces the added `\,`.
defineMacro("\\dotsx", "\\ldots\\,");
// \let\DOTSI\relax
// \let\DOTSB\relax
// \let\DOTSX\relax
defineMacro("\\DOTSI", "\\relax");
defineMacro("\\DOTSB", "\\relax");
defineMacro("\\DOTSX", "\\relax");
// http://texdoc.net/texmf-dist/doc/latex/amsmath/amsmath.pdf
defineMacro("\\thinspace", "\\,"); // \let\thinspace\,
defineMacro("\\medspace", "\\:"); // \let\medspace\:
defineMacro("\\thickspace", "\\;"); // \let\thickspace\;
//////////////////////////////////////////////////////////////////////
// LaTeX source2e
// \def\TeX{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125emX\@}
// TODO: Doesn't normally work in math mode because \@ fails. KaTeX doesn't
// support \@ yet, so that's omitted, and we add \text so that the result
// doesn't look funny in math mode.
defineMacro("\\TeX", "\\textrm{T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX}");
// \DeclareRobustCommand{\LaTeX}{L\kern-.36em%
// {\sbox\z@ T%
// \vbox to\ht\z@{\hbox{\check@mathfonts
// \fontsize\sf@size\z@
// \math@fontsfalse\selectfont
// A}%
// \vss}%
// }%
// \kern-.15em%
// \TeX}
// This code aligns the top of the A with the T (from the perspective of TeX's
// boxes, though visually the A appears to extend above slightly).
// We compute the corresponding \raisebox when A is rendered at \scriptsize,
// which is size3, which has a scale factor of 0.7 (see Options.js).
const latexRaiseA = fontMetricsData['Main-Regular']["T".charCodeAt(0)][1] -
0.7 * fontMetricsData['Main-Regular']["A".charCodeAt(0)][1] + "em";
defineMacro("\\LaTeX",
`\\textrm{L\\kern-.36em\\raisebox{${latexRaiseA}}{\\scriptsize A}` +
"\\kern-.15em\\TeX}");
// New KaTeX logo based on tweaking LaTeX logo
defineMacro("\\KaTeX",
`\\textrm{K\\kern-.17em\\raisebox{${latexRaiseA}}{\\scriptsize A}` +
"\\kern-.15em\\TeX}");
// \DeclareRobustCommand\hspace{\@ifstar\@hspacer\@hspace}
// \def\@hspace#1{\hskip #1\relax}
// KaTeX doesn't do line breaks, so \hspace and \hspace* are the same as \kern
defineMacro("\\hspace", "\\@ifstar\\kern\\kern");
//////////////////////////////////////////////////////////////////////
// mathtools.sty
//\providecommand\ordinarycolon{:}
defineMacro("\\ordinarycolon", ":");
//\def\vcentcolon{\mathrel{\mathop\ordinarycolon}}
//TODO(edemaine): Not yet centered. Fix via \raisebox or #726
defineMacro("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}");
// \providecommand*\dblcolon{\vcentcolon\mathrel{\mkern-.9mu}\vcentcolon}
defineMacro("\\dblcolon", "\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon");
// \providecommand*\coloneqq{\vcentcolon\mathrel{\mkern-1.2mu}=}
defineMacro("\\coloneqq", "\\vcentcolon\\mathrel{\\mkern-1.2mu}=");
// \providecommand*\Coloneqq{\dblcolon\mathrel{\mkern-1.2mu}=}
defineMacro("\\Coloneqq", "\\dblcolon\\mathrel{\\mkern-1.2mu}=");
// \providecommand*\coloneq{\vcentcolon\mathrel{\mkern-1.2mu}\mathrel{-}}
defineMacro("\\coloneq", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}");
// \providecommand*\Coloneq{\dblcolon\mathrel{\mkern-1.2mu}\mathrel{-}}
defineMacro("\\Coloneq", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}");
// \providecommand*\eqqcolon{=\mathrel{\mkern-1.2mu}\vcentcolon}
defineMacro("\\eqqcolon", "=\\mathrel{\\mkern-1.2mu}\\vcentcolon");
// \providecommand*\Eqqcolon{=\mathrel{\mkern-1.2mu}\dblcolon}
defineMacro("\\Eqqcolon", "=\\mathrel{\\mkern-1.2mu}\\dblcolon");
// \providecommand*\eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\vcentcolon}
defineMacro("\\eqcolon", "\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon");
// \providecommand*\Eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\dblcolon}
defineMacro("\\Eqcolon", "\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon");
// \providecommand*\colonapprox{\vcentcolon\mathrel{\mkern-1.2mu}\approx}
defineMacro("\\colonapprox", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx");
// \providecommand*\Colonapprox{\dblcolon\mathrel{\mkern-1.2mu}\approx}
defineMacro("\\Colonapprox", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx");
// \providecommand*\colonsim{\vcentcolon\mathrel{\mkern-1.2mu}\sim}
defineMacro("\\colonsim", "\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim");
// \providecommand*\Colonsim{\dblcolon\mathrel{\mkern-1.2mu}\sim}
defineMacro("\\Colonsim", "\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim");
//////////////////////////////////////////////////////////////////////
// colonequals.sty
// Alternate names for mathtools's macros:
defineMacro("\\ratio", "\\vcentcolon");
defineMacro("\\coloncolon", "\\dblcolon");
defineMacro("\\colonequals", "\\coloneqq");
defineMacro("\\coloncolonequals", "\\Coloneqq");
defineMacro("\\equalscolon", "\\eqqcolon");
defineMacro("\\equalscoloncolon", "\\Eqqcolon");
defineMacro("\\colonminus", "\\coloneq");
defineMacro("\\coloncolonminus", "\\Coloneq");
defineMacro("\\minuscolon", "\\eqcolon");
defineMacro("\\minuscoloncolon", "\\Eqcolon");
// \colonapprox name is same in mathtools and colonequals.
defineMacro("\\coloncolonapprox", "\\Colonapprox");
// \colonsim name is same in mathtools and colonequals.
defineMacro("\\coloncolonsim", "\\Colonsim");
// Additional macros, implemented by analogy with mathtools definitions:
defineMacro("\\simcolon", "\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon");
defineMacro("\\simcoloncolon", "\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon");
defineMacro("\\approxcolon", "\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon");
defineMacro("\\approxcoloncolon",
"\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon");
// Present in newtxmath, pxfonts and txfonts
// TODO: The unicode character U+220C ∌ should be added to the font, and this
// macro turned into a propper defineSymbol in symbols.js. That way, the
// MathML result will be much cleaner.
defineMacro("\\notni", "\\not\\ni");
defineMacro("\\limsup", "\\DOTSB\\mathop{\\operatorname{lim\\,sup}}\\limits");
defineMacro("\\liminf", "\\DOTSB\\mathop{\\operatorname{lim\\,inf}}\\limits");