mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-06 03:38:39 +00:00
* Comments without terminating newlines in nonstrict mode Fix #1506 by allowing single-line comments (`%` without terminating newline) in nonstrict mode. `Lexer` and `MacroExpander` now store the `Settings` object, so the `Lexer` can complain about missing newline according to the `strict` setting. I filtered this out from the snapshot tests with a slightly different `replacer`. * Reimplement \href like \verb, add \url Major restructuring to lex URL arguments differently, e.g. to support `\href%{hello}` and `\href{http://foo.com/#test%}{hello}`. The new URL parsing code is simpler, but involves a special case in `parseSymbol` like `\verb`. Also add support for `\url` while we're here. * Cleanup * Fix flow errors and improve error messages * Add \url to documentation * Improve doc formatting
347 lines
11 KiB
JavaScript
347 lines
11 KiB
JavaScript
// @flow
|
|
/**
|
|
* This file contains the “gullet” where macros are expanded
|
|
* until only non-macro tokens remain.
|
|
*/
|
|
|
|
import functions from "./functions";
|
|
import symbols from "./symbols";
|
|
import Lexer from "./Lexer";
|
|
import {Token} from "./Token";
|
|
import type {Mode} from "./types";
|
|
import ParseError from "./ParseError";
|
|
import Namespace from "./Namespace";
|
|
import builtinMacros from "./macros";
|
|
|
|
import type {MacroContextInterface, MacroDefinition, MacroExpansion}
|
|
from "./macros";
|
|
import type Settings from "./Settings";
|
|
|
|
// List of commands that act like macros but aren't defined as a macro,
|
|
// function, or symbol. Used in `isDefined`.
|
|
export const implicitCommands = {
|
|
"\\relax": true, // MacroExpander.js
|
|
"^": true, // Parser.js
|
|
"_": true, // Parser.js
|
|
"\\limits": true, // Parser.js
|
|
"\\nolimits": true, // Parser.js
|
|
};
|
|
|
|
export default class MacroExpander implements MacroContextInterface {
|
|
settings: Settings;
|
|
expansionCount: number;
|
|
lexer: Lexer;
|
|
macros: Namespace<MacroDefinition>;
|
|
stack: Token[];
|
|
mode: Mode;
|
|
|
|
constructor(input: string, settings: Settings, mode: Mode) {
|
|
this.settings = settings;
|
|
this.expansionCount = 0;
|
|
this.feed(input);
|
|
// Make new global namespace
|
|
this.macros = new Namespace(builtinMacros, settings.macros);
|
|
this.mode = mode;
|
|
this.stack = []; // contains tokens in REVERSE order
|
|
}
|
|
|
|
/**
|
|
* Feed a new input string to the same MacroExpander
|
|
* (with existing macros etc.).
|
|
*/
|
|
feed(input: string) {
|
|
this.lexer = new Lexer(input, this.settings);
|
|
}
|
|
|
|
/**
|
|
* Switches between "text" and "math" modes.
|
|
*/
|
|
switchMode(newMode: Mode) {
|
|
this.mode = newMode;
|
|
}
|
|
|
|
/**
|
|
* Start a new group nesting within all namespaces.
|
|
*/
|
|
beginGroup() {
|
|
this.macros.beginGroup();
|
|
}
|
|
|
|
/**
|
|
* End current group nesting within all namespaces.
|
|
*/
|
|
endGroup() {
|
|
this.macros.endGroup();
|
|
}
|
|
|
|
/**
|
|
* Returns the topmost token on the stack, without expanding it.
|
|
* Similar in behavior to TeX's `\futurelet`.
|
|
*/
|
|
future(): Token {
|
|
if (this.stack.length === 0) {
|
|
this.pushToken(this.lexer.lex());
|
|
}
|
|
return this.stack[this.stack.length - 1];
|
|
}
|
|
|
|
/**
|
|
* Remove and return the next unexpanded token.
|
|
*/
|
|
popToken(): Token {
|
|
this.future(); // ensure non-empty stack
|
|
return this.stack.pop();
|
|
}
|
|
|
|
/**
|
|
* Add a given token to the token stack. In particular, this get be used
|
|
* to put back a token returned from one of the other methods.
|
|
*/
|
|
pushToken(token: Token) {
|
|
this.stack.push(token);
|
|
}
|
|
|
|
/**
|
|
* Append an array of tokens to the token stack.
|
|
*/
|
|
pushTokens(tokens: Token[]) {
|
|
this.stack.push(...tokens);
|
|
}
|
|
|
|
/**
|
|
* Consume all following space tokens, without expansion.
|
|
*/
|
|
consumeSpaces() {
|
|
for (;;) {
|
|
const token = this.future();
|
|
if (token.text === " ") {
|
|
this.stack.pop();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Consume the specified number of arguments from the token stream,
|
|
* and return the resulting array of arguments.
|
|
*/
|
|
consumeArgs(numArgs: number): Token[][] {
|
|
const args: Token[][] = [];
|
|
// obtain arguments, either single token or balanced {…} group
|
|
for (let i = 0; i < numArgs; ++i) {
|
|
this.consumeSpaces(); // ignore spaces before each argument
|
|
const startOfArg = this.popToken();
|
|
if (startOfArg.text === "{") {
|
|
const arg: Token[] = [];
|
|
let depth = 1;
|
|
while (depth !== 0) {
|
|
const tok = this.popToken();
|
|
arg.push(tok);
|
|
if (tok.text === "{") {
|
|
++depth;
|
|
} else if (tok.text === "}") {
|
|
--depth;
|
|
} else if (tok.text === "EOF") {
|
|
throw new ParseError(
|
|
"End of input in macro argument",
|
|
startOfArg);
|
|
}
|
|
}
|
|
arg.pop(); // remove last }
|
|
arg.reverse(); // like above, to fit in with stack order
|
|
args[i] = arg;
|
|
} else if (startOfArg.text === "EOF") {
|
|
throw new ParseError(
|
|
"End of input expecting macro argument");
|
|
} else {
|
|
args[i] = [startOfArg];
|
|
}
|
|
}
|
|
return args;
|
|
}
|
|
|
|
/**
|
|
* Expand the next token only once if possible.
|
|
*
|
|
* If the token is expanded, the resulting tokens will be pushed onto
|
|
* the stack in reverse order and will be returned as an array,
|
|
* also in reverse order.
|
|
*
|
|
* If not, the next token will be returned without removing it
|
|
* from the stack. This case can be detected by a `Token` return value
|
|
* instead of an `Array` return value.
|
|
*
|
|
* In either case, the next token will be on the top of the stack,
|
|
* or the stack will be empty.
|
|
*
|
|
* Used to implement `expandAfterFuture` and `expandNextToken`.
|
|
*
|
|
* At the moment, macro expansion doesn't handle delimited macros,
|
|
* i.e. things like those defined by \def\foo#1\end{…}.
|
|
* See the TeX book page 202ff. for details on how those should behave.
|
|
*/
|
|
expandOnce(): Token | Token[] {
|
|
const topToken = this.popToken();
|
|
const name = topToken.text;
|
|
const expansion = this._getExpansion(name);
|
|
if (expansion == null) { // mainly checking for undefined here
|
|
// Fully expanded
|
|
this.pushToken(topToken);
|
|
return topToken;
|
|
}
|
|
this.expansionCount++;
|
|
if (this.expansionCount > this.settings.maxExpand) {
|
|
throw new ParseError("Too many expansions: infinite loop or " +
|
|
"need to increase maxExpand setting");
|
|
}
|
|
let tokens = expansion.tokens;
|
|
if (expansion.numArgs) {
|
|
const args = this.consumeArgs(expansion.numArgs);
|
|
// paste arguments in place of the placeholders
|
|
tokens = tokens.slice(); // make a shallow copy
|
|
for (let i = tokens.length - 1; i >= 0; --i) {
|
|
let tok = tokens[i];
|
|
if (tok.text === "#") {
|
|
if (i === 0) {
|
|
throw new ParseError(
|
|
"Incomplete placeholder at end of macro body",
|
|
tok);
|
|
}
|
|
tok = tokens[--i]; // next token on stack
|
|
if (tok.text === "#") { // ## → #
|
|
tokens.splice(i + 1, 1); // drop first #
|
|
} else if (/^[1-9]$/.test(tok.text)) {
|
|
// replace the placeholder with the indicated argument
|
|
tokens.splice(i, 2, ...args[+tok.text - 1]);
|
|
} else {
|
|
throw new ParseError(
|
|
"Not a valid argument number",
|
|
tok);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Concatenate expansion onto top of stack.
|
|
this.pushTokens(tokens);
|
|
return tokens;
|
|
}
|
|
|
|
/**
|
|
* Expand the next token only once (if possible), and return the resulting
|
|
* top token on the stack (without removing anything from the stack).
|
|
* Similar in behavior to TeX's `\expandafter\futurelet`.
|
|
* Equivalent to expandOnce() followed by future().
|
|
*/
|
|
expandAfterFuture(): Token {
|
|
this.expandOnce();
|
|
return this.future();
|
|
}
|
|
|
|
/**
|
|
* Recursively expand first token, then return first non-expandable token.
|
|
*/
|
|
expandNextToken(): Token {
|
|
for (;;) {
|
|
const expanded = this.expandOnce();
|
|
// expandOnce returns Token if and only if it's fully expanded.
|
|
if (expanded instanceof Token) {
|
|
// \relax stops the expansion, but shouldn't get returned (a
|
|
// null return value couldn't get implemented as a function).
|
|
if (expanded.text === "\\relax") {
|
|
this.stack.pop();
|
|
} else {
|
|
return this.stack.pop(); // === expanded
|
|
}
|
|
}
|
|
}
|
|
|
|
// Flow unable to figure out that this pathway is impossible.
|
|
// https://github.com/facebook/flow/issues/4808
|
|
throw new Error(); // eslint-disable-line no-unreachable
|
|
}
|
|
|
|
/**
|
|
* Fully expand the given macro name and return the resulting list of
|
|
* tokens, or return `undefined` if no such macro is defined.
|
|
*/
|
|
expandMacro(name: string): Token[] | void {
|
|
if (!this.macros.get(name)) {
|
|
return undefined;
|
|
}
|
|
const output = [];
|
|
const oldStackLength = this.stack.length;
|
|
this.pushToken(new Token(name));
|
|
while (this.stack.length > oldStackLength) {
|
|
const expanded = this.expandOnce();
|
|
// expandOnce returns Token if and only if it's fully expanded.
|
|
if (expanded instanceof Token) {
|
|
output.push(this.stack.pop());
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
|
|
/**
|
|
* Fully expand the given macro name and return the result as a string,
|
|
* or return `undefined` if no such macro is defined.
|
|
*/
|
|
expandMacroAsText(name: string): string | void {
|
|
const tokens = this.expandMacro(name);
|
|
if (tokens) {
|
|
return tokens.map((token) => token.text).join("");
|
|
} else {
|
|
return tokens;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the expanded macro as a reversed array of tokens and a macro
|
|
* argument count. Or returns `null` if no such macro.
|
|
*/
|
|
_getExpansion(name: string): ?MacroExpansion {
|
|
const definition = this.macros.get(name);
|
|
if (definition == null) { // mainly checking for undefined here
|
|
return definition;
|
|
}
|
|
const expansion =
|
|
typeof definition === "function" ? definition(this) : definition;
|
|
if (typeof expansion === "string") {
|
|
let numArgs = 0;
|
|
if (expansion.indexOf("#") !== -1) {
|
|
const stripped = expansion.replace(/##/g, "");
|
|
while (stripped.indexOf("#" + (numArgs + 1)) !== -1) {
|
|
++numArgs;
|
|
}
|
|
}
|
|
const bodyLexer = new Lexer(expansion, this.settings);
|
|
const tokens = [];
|
|
let tok = bodyLexer.lex();
|
|
while (tok.text !== "EOF") {
|
|
tokens.push(tok);
|
|
tok = bodyLexer.lex();
|
|
}
|
|
tokens.reverse(); // to fit in with stack using push and pop
|
|
const expanded = {tokens, numArgs};
|
|
return expanded;
|
|
}
|
|
|
|
return expansion;
|
|
}
|
|
|
|
/**
|
|
* Determine whether a command is currently "defined" (has some
|
|
* functionality), meaning that it's a macro (in the current group),
|
|
* a function, a symbol, or one of the special commands listed in
|
|
* `implicitCommands`.
|
|
*/
|
|
isDefined(name: string): boolean {
|
|
return this.macros.has(name) ||
|
|
functions.hasOwnProperty(name) ||
|
|
symbols.math.hasOwnProperty(name) ||
|
|
symbols.text.hasOwnProperty(name) ||
|
|
implicitCommands.hasOwnProperty(name);
|
|
}
|
|
}
|
|
|