mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 19:28:39 +00:00
Strict setting controls \newline display-mode behavior; fix MacroExpander space handling (#1314)
* Strict setting controls \newline display-mode behavior * Bug-fix space handling in macros Whitespace after a \controlWord is now handled within the lexer, not by the MacroExpander. This way, \\ expanding to \newline doesn't accidentally cause spaces to get consumed. * Rename nonstrict -> reportNonstrict; strictBehavior -> useStrictBehavior * Second category of errorCodes
This commit is contained in:
12
README.md
12
README.md
@@ -79,6 +79,11 @@ You can provide an object of options as the last argument to `katex.render` and
|
||||
- `strict`: `boolean` or `string` or `function` (default: `"warn"`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"` (the default), warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior. A list of such features and their `errorCode`s:
|
||||
- `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode.
|
||||
- `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode.
|
||||
A second category of `errorCode`s never throw errors, but their strictness
|
||||
affects the behavior of KaTeX:
|
||||
- `"newLineInDisplayMode"`: Use of `\\` or `\newline` in display mode
|
||||
(outside an array/tabular environment). In strict mode, no line break
|
||||
results, as in LaTeX.
|
||||
|
||||
For example:
|
||||
|
||||
@@ -131,13 +136,6 @@ will appear larger than 1cm in browser units.
|
||||
- MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
|
||||
`colorIsTextColor` option to `true` for this behavior. KaTeX's default
|
||||
behavior matches MathJax with its `color.js` extension enabled.
|
||||
- KaTeX breaks lines with `\\` and `\newline` in inline math, but ignores them
|
||||
in display math (matching LaTeX's behavior, but not MathJax's behavior).
|
||||
To allow `\\` and `\newline` to break lines in display mode,
|
||||
add the following CSS rule:
|
||||
```css
|
||||
.katex-display > .katex > .katex-html > .newline { display: block !important; }
|
||||
```
|
||||
|
||||
## Libraries
|
||||
|
||||
|
17
src/Lexer.js
17
src/Lexer.js
@@ -33,14 +33,19 @@ import {LexerInterface, Token} from "./Token";
|
||||
* If there is no matching function or symbol definition, the Parser will
|
||||
* still reject the input.
|
||||
*/
|
||||
const spaceRegexString = "[ \r\n\t]";
|
||||
const commentRegexString = "%[^\n]*[\n]";
|
||||
const controlWordRegexString = "\\\\[a-zA-Z@]+";
|
||||
const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
|
||||
const controlWordWhitespaceRegexString =
|
||||
`${controlWordRegexString}${spaceRegexString}*`;
|
||||
const controlWordWhitespaceRegex = new RegExp(
|
||||
`^(${controlWordRegexString})${spaceRegexString}*$`);
|
||||
const combiningDiacriticalMarkString = "[\u0300-\u036f]";
|
||||
export const combiningDiacriticalMarksEndRegex =
|
||||
new RegExp(`${combiningDiacriticalMarkString}+$`);
|
||||
const tokenRegex = new RegExp(
|
||||
"([ \r\n\t]+)|" + // whitespace
|
||||
`(${spaceRegexString}+)|` + // whitespace
|
||||
`(${commentRegexString}` + // comments
|
||||
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
||||
@@ -48,7 +53,7 @@ const tokenRegex = new RegExp(
|
||||
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
||||
"|\\\\verb\\*([^]).*?\\3" + // \verb*
|
||||
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
|
||||
`|${controlWordRegexString}` + // \macroName
|
||||
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces
|
||||
`|${controlSymbolRegexString}` + // \\, \', etc.
|
||||
")"
|
||||
);
|
||||
@@ -85,11 +90,17 @@ export default class Lexer implements LexerInterface {
|
||||
`Unexpected character: '${input[pos]}'`,
|
||||
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
|
||||
}
|
||||
const text = match[2] || " ";
|
||||
let text = match[2] || " ";
|
||||
const start = this.pos;
|
||||
this.pos += match[0].length;
|
||||
const end = this.pos;
|
||||
|
||||
// Trim any trailing whitespace from control word match
|
||||
const controlMatch = text.match(controlWordWhitespaceRegex);
|
||||
if (controlMatch) {
|
||||
text = controlMatch[1];
|
||||
}
|
||||
|
||||
if (commentRegex.test(text)) {
|
||||
return this.lex();
|
||||
} else {
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* until only non-macro tokens remain.
|
||||
*/
|
||||
|
||||
import Lexer, {controlWordRegex} from "./Lexer";
|
||||
import Lexer from "./Lexer";
|
||||
import {Token} from "./Token";
|
||||
import builtinMacros from "./macros";
|
||||
import type {Mode} from "./types";
|
||||
@@ -143,11 +143,6 @@ export default class MacroExpander implements MacroContextInterface {
|
||||
expandOnce(): Token | Token[] {
|
||||
const topToken = this.popToken();
|
||||
const name = topToken.text;
|
||||
const isMacro = (name.charAt(0) === "\\");
|
||||
if (isMacro && controlWordRegex.test(name)) {
|
||||
// Consume all spaces after \macro (but not \\, \', etc.)
|
||||
this.consumeSpaces();
|
||||
}
|
||||
if (!this.macros.hasOwnProperty(name)) {
|
||||
// Fully expanded
|
||||
this.pushToken(topToken);
|
||||
|
@@ -121,7 +121,7 @@ type ParseNodeTypes = {
|
||||
|},
|
||||
"cr": {|
|
||||
type: "cr",
|
||||
//newRow: boolean,
|
||||
newRow: boolean,
|
||||
newLine: boolean,
|
||||
size: ?ParseNode<*>,
|
||||
|},
|
||||
|
@@ -150,7 +150,7 @@ export default class Parser {
|
||||
return expression;
|
||||
}
|
||||
|
||||
static endOfExpression = ["}", "\\end", "\\right", "&", "\\cr"];
|
||||
static endOfExpression = ["}", "\\end", "\\right", "&"];
|
||||
|
||||
/**
|
||||
* Parses an "expression", which is a list of atoms.
|
||||
@@ -946,7 +946,7 @@ export default class Parser {
|
||||
!symbols[this.mode][text[0]]) {
|
||||
// This behavior is not strict (XeTeX-compatible) in math mode.
|
||||
if (this.settings.strict && this.mode === "math") {
|
||||
this.settings.nonstrict("unicodeTextInMathMode",
|
||||
this.settings.reportNonstrict("unicodeTextInMathMode",
|
||||
`Accented Unicode text character "${text[0]}" used in ` +
|
||||
`math mode`, nucleus);
|
||||
}
|
||||
@@ -967,7 +967,7 @@ export default class Parser {
|
||||
if (symbols[this.mode][text]) {
|
||||
if (this.settings.strict && this.mode === 'math' &&
|
||||
extraLatin.indexOf(text) >= 0) {
|
||||
this.settings.nonstrict("unicodeTextInMathMode",
|
||||
this.settings.reportNonstrict("unicodeTextInMathMode",
|
||||
`Latin-1/Unicode text character "${text[0]}" used in ` +
|
||||
`math mode`, nucleus);
|
||||
}
|
||||
@@ -975,7 +975,7 @@ export default class Parser {
|
||||
text, this.mode, nucleus);
|
||||
} else if (supportedCodepoint(text.charCodeAt(0))) {
|
||||
if (this.settings.strict && this.mode === 'math') {
|
||||
this.settings.nonstrict("unicodeTextInMathMode",
|
||||
this.settings.reportNonstrict("unicodeTextInMathMode",
|
||||
`Unicode text character "${text[0]}" used in math mode`,
|
||||
nucleus);
|
||||
}
|
||||
|
@@ -61,7 +61,8 @@ class Settings {
|
||||
* Report nonstrict (non-LaTeX-compatible) input.
|
||||
* Can safely not be called if `this.strict` is false in JavaScript.
|
||||
*/
|
||||
nonstrict(errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) {
|
||||
reportNonstrict(errorCode: string, errorMsg: string,
|
||||
token?: Token | ParseNode<*>) {
|
||||
let strict = this.strict;
|
||||
if (typeof strict === "function") {
|
||||
// Allow return value of strict function to be boolean or string
|
||||
@@ -84,6 +85,45 @@ class Settings {
|
||||
`unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether to apply strict (LaTeX-adhering) behavior for unusual
|
||||
* input (like `\\`). Unlike `nonstrict`, will not throw an error;
|
||||
* instead, "error" translates to a return value of `true`, while "ignore"
|
||||
* translates to a return value of `false`. May still print a warning:
|
||||
* "warn" prints a warning and returns `false`.
|
||||
* This is for the second category of `errorCode`s listed in the README.
|
||||
*/
|
||||
useStrictBehavior(errorCode: string, errorMsg: string,
|
||||
token?: Token | ParseNode<*>) {
|
||||
let strict = this.strict;
|
||||
if (typeof strict === "function") {
|
||||
// Allow return value of strict function to be boolean or string
|
||||
// (or null/undefined, meaning no further processing).
|
||||
// But catch any exceptions thrown by function, treating them
|
||||
// like "error".
|
||||
try {
|
||||
strict = strict(errorCode, errorMsg, token);
|
||||
} catch (error) {
|
||||
strict = "error";
|
||||
}
|
||||
}
|
||||
if (!strict || strict === "ignore") {
|
||||
return false;
|
||||
} else if (strict === true || strict === "error") {
|
||||
return true;
|
||||
} else if (strict === "warn") {
|
||||
typeof console !== "undefined" && console.warn(
|
||||
"LaTeX-incompatible input and strict mode is set to 'warn': " +
|
||||
`${errorMsg} [${errorCode}]`);
|
||||
return false;
|
||||
} else { // won't happen in type-safe code
|
||||
typeof console !== "undefined" && console.warn(
|
||||
"LaTeX-incompatible input and strict mode is set to " +
|
||||
`unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default Settings;
|
||||
|
@@ -55,6 +55,10 @@ function parseArray(
|
||||
result: ArrayEnvNodeData,
|
||||
style: StyleStr,
|
||||
): ParseNode<*> {
|
||||
// Parse body of array with \\ temporarily mapped to \cr
|
||||
const oldNewline = parser.gullet.macros["\\\\"];
|
||||
parser.gullet.macros["\\\\"] = "\\cr";
|
||||
|
||||
let row = [];
|
||||
const body = [row];
|
||||
const rowGaps = [];
|
||||
@@ -64,7 +68,7 @@ function parseArray(
|
||||
numHLinesBeforeRow.push(getNumHLines(parser));
|
||||
|
||||
while (true) { // eslint-disable-line no-constant-condition
|
||||
let cell = parser.parseExpression(false, "\\\\");
|
||||
let cell = parser.parseExpression(false, "\\cr");
|
||||
cell = new ParseNode("ordgroup", cell, parser.mode);
|
||||
if (style) {
|
||||
cell = new ParseNode("styling", {
|
||||
@@ -87,7 +91,7 @@ function parseArray(
|
||||
body.pop();
|
||||
}
|
||||
break;
|
||||
} else if (next === "\\\\" || next === "\\cr") {
|
||||
} else if (next === "\\cr") {
|
||||
const cr = parser.parseFunction();
|
||||
if (!cr) {
|
||||
throw new ParseError(`Failed to parse function after ${next}`);
|
||||
@@ -107,6 +111,7 @@ function parseArray(
|
||||
result.body = body;
|
||||
result.rowGaps = rowGaps;
|
||||
result.numHLinesBeforeRow = numHLinesBeforeRow;
|
||||
parser.gullet.macros["\\\\"] = oldNewline;
|
||||
return new ParseNode("array", result, parser.mode);
|
||||
}
|
||||
|
||||
|
@@ -7,9 +7,14 @@ import mathMLTree from "../mathMLTree";
|
||||
import { calculateSize } from "../units";
|
||||
import ParseError from "../ParseError";
|
||||
|
||||
// \\ is a macro mapping to either \cr or \newline. Because they have the
|
||||
// same signature, we implement them as one megafunction, with newRow
|
||||
// indicating whether we're in the \cr case, and newLine indicating whether
|
||||
// to break the line in the \newline case.
|
||||
|
||||
defineFunction({
|
||||
type: "cr",
|
||||
names: ["\\\\", "\\cr", "\\newline"],
|
||||
names: ["\\cr", "\\newline"],
|
||||
props: {
|
||||
numArgs: 0,
|
||||
numOptionalArgs: 1,
|
||||
@@ -18,39 +23,53 @@ defineFunction({
|
||||
},
|
||||
|
||||
handler: (context, args, optArgs) => {
|
||||
const newRow = (context.funcName === "\\cr");
|
||||
let newLine = false;
|
||||
if (!newRow) {
|
||||
if (context.parser.settings.displayMode &&
|
||||
context.parser.settings.useStrictBehavior(
|
||||
"newLineInDisplayMode", "In LaTeX, \\\\ or \\newline " +
|
||||
"does nothing in display mode")) {
|
||||
newLine = false;
|
||||
} else {
|
||||
newLine = true;
|
||||
}
|
||||
}
|
||||
return {
|
||||
type: "cr",
|
||||
// \\ and \cr both end the row in a tabular environment
|
||||
// This flag isn't currently needed by environments/array.js
|
||||
//newRow: context.funcName !== "\\newline",
|
||||
// \\ and \newline both end the line in an inline math environment
|
||||
newLine: context.funcName !== "\\cr",
|
||||
newLine,
|
||||
newRow,
|
||||
size: optArgs[0],
|
||||
};
|
||||
},
|
||||
|
||||
// The following builders are called only at the top level,
|
||||
// not within tabular environments.
|
||||
// not within tabular/array environments.
|
||||
|
||||
htmlBuilder: (group, options) => {
|
||||
if (!group.value.newLine) {
|
||||
if (group.value.newRow) {
|
||||
throw new ParseError(
|
||||
"\\cr valid only within a tabular environment");
|
||||
"\\cr valid only within a tabular/array environment");
|
||||
}
|
||||
const span = buildCommon.makeSpan(["mspace", "newline"], [], options);
|
||||
if (group.value.size) {
|
||||
span.style.marginTop =
|
||||
calculateSize(group.value.size.value, options) + "em";
|
||||
const span = buildCommon.makeSpan(["mspace"], [], options);
|
||||
if (group.value.newLine) {
|
||||
span.classes.push("newline");
|
||||
if (group.value.size) {
|
||||
span.style.marginTop =
|
||||
calculateSize(group.value.size.value, options) + "em";
|
||||
}
|
||||
}
|
||||
return span;
|
||||
},
|
||||
|
||||
mathmlBuilder: (group, options) => {
|
||||
const node = new mathMLTree.MathNode("mspace");
|
||||
node.setAttribute("linebreak", "newline");
|
||||
if (group.value.size) {
|
||||
node.setAttribute("height",
|
||||
calculateSize(group.value.size.value, options) + "em");
|
||||
if (group.value.newLine) {
|
||||
node.setAttribute("linebreak", "newline");
|
||||
if (group.value.size) {
|
||||
node.setAttribute("height",
|
||||
calculateSize(group.value.size.value, options) + "em");
|
||||
}
|
||||
}
|
||||
return node;
|
||||
},
|
||||
|
@@ -22,17 +22,17 @@ defineFunction({
|
||||
const muUnit = (args[0].value.unit === 'mu');
|
||||
if (mathFunction) {
|
||||
if (!muUnit) {
|
||||
context.parser.settings.nonstrict("mathVsTextUnits",
|
||||
context.parser.settings.reportNonstrict("mathVsTextUnits",
|
||||
`LaTeX's ${context.funcName} supports only mu units, ` +
|
||||
`not ${args[0].value.unit} units`);
|
||||
}
|
||||
if (context.parser.mode !== "math") {
|
||||
context.parser.settings.nonstrict("mathVsTextUnits",
|
||||
context.parser.settings.reportNonstrict("mathVsTextUnits",
|
||||
`LaTeX's ${context.funcName} works only in math mode`);
|
||||
}
|
||||
} else { // !mathFunction
|
||||
if (muUnit) {
|
||||
context.parser.settings.nonstrict("mathVsTextUnits",
|
||||
context.parser.settings.reportNonstrict("mathVsTextUnits",
|
||||
`LaTeX's ${context.funcName} doesn't support mu units`);
|
||||
}
|
||||
}
|
||||
|
@@ -30,11 +30,6 @@
|
||||
|
||||
> .katex-html {
|
||||
display: inline-block;
|
||||
|
||||
/* \newline doesn't do anything in display mode */
|
||||
> .newline {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -66,7 +61,7 @@
|
||||
}
|
||||
|
||||
.katex-html {
|
||||
/* \newline is an empty block at top level of inline mode */
|
||||
/* \newline is an empty block at top level, between .base elements */
|
||||
> .newline {
|
||||
display: block;
|
||||
}
|
||||
|
@@ -357,6 +357,9 @@ defineMacro("\\thickspace", "\\;"); // \let\thickspace\;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// LaTeX source2e
|
||||
|
||||
// \\ defaults to \newline, but changes to \cr within array environment
|
||||
defineMacro("\\\\", "\\newline");
|
||||
|
||||
// \def\TeX{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125emX\@}
|
||||
// TODO: Doesn't normally work in math mode because \@ fails. KaTeX doesn't
|
||||
// support \@ yet, so that's omitted, and we add \text so that the result
|
||||
|
@@ -24,4 +24,4 @@ export type ArgType = "color" | "size" | "url" | "original" | Mode;
|
||||
export type StyleStr = "text" | "display" | "script" | "scriptscript";
|
||||
|
||||
// Allowable token text for "break" arguments in parser
|
||||
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\\\";
|
||||
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\cr";
|
||||
|
@@ -60,12 +60,12 @@ describe("Parser:", function() {
|
||||
it("rejects repeated infix operators", function() {
|
||||
expect("1\\over 2\\over 3").toFailWithParseError(
|
||||
"only one infix operator per group at position 9: " +
|
||||
"1\\over 2\\̲o̲v̲e̲r̲ 3");
|
||||
"1\\over 2\\̲o̲v̲e̲r̲ ̲3");
|
||||
});
|
||||
it("rejects conflicting infix operators", function() {
|
||||
expect("1\\over 2\\choose 3").toFailWithParseError(
|
||||
"only one infix operator per group at position 9: " +
|
||||
"1\\over 2\\̲c̲h̲o̲o̲s̲e̲ 3");
|
||||
"1\\over 2\\̲c̲h̲o̲o̲s̲e̲ ̲3");
|
||||
});
|
||||
});
|
||||
|
||||
|
@@ -181,7 +181,7 @@ beforeEach(function() {
|
||||
expect(actual).toParse(usedSettings);
|
||||
|
||||
try {
|
||||
_getBuilt(actual, settings);
|
||||
_getBuilt(actual, usedSettings);
|
||||
} catch (e) {
|
||||
result.pass = false;
|
||||
if (e instanceof ParseError) {
|
||||
@@ -196,6 +196,31 @@ beforeEach(function() {
|
||||
return result;
|
||||
},
|
||||
|
||||
toNotBuild: function(actual, settings) {
|
||||
const usedSettings = settings ? settings : defaultSettings;
|
||||
|
||||
const result = {
|
||||
pass: false,
|
||||
message: () => "Expected '" + actual + "' to fail " +
|
||||
"building, but it succeeded",
|
||||
};
|
||||
|
||||
try {
|
||||
_getBuilt(actual, usedSettings);
|
||||
} catch (e) {
|
||||
if (e instanceof ParseError) {
|
||||
result.pass = true;
|
||||
result.message = () => "'" + actual + "' correctly " +
|
||||
"didn't build with error: " + e.message;
|
||||
} else {
|
||||
result.message = () => "'" + actual + "' failed " +
|
||||
"building with unknown error: " + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
},
|
||||
|
||||
toParseLike: function(actual, expected, settings) {
|
||||
const usedSettings = settings ? settings : defaultSettings;
|
||||
|
||||
@@ -2719,6 +2744,10 @@ describe("A macro expander", function() {
|
||||
compareParseTree("\\text{\\foo }", "\\text{}", {"\\foo": "\\relax"});
|
||||
});
|
||||
|
||||
it("should not consume spaces after control-word expansion", function() {
|
||||
compareParseTree("\\text{\\\\ }", "\\text{ }", {"\\\\": "\\relax"});
|
||||
});
|
||||
|
||||
it("should consume spaces after \\relax", function() {
|
||||
compareParseTree("\\text{\\relax }", "\\text{}");
|
||||
});
|
||||
@@ -3133,7 +3162,7 @@ describe("Newlines via \\\\ and \\newline", function() {
|
||||
});
|
||||
|
||||
it("should not allow \\cr at top level", () => {
|
||||
expect("hello \\cr world").toNotParse();
|
||||
expect("hello \\cr world").toNotBuild();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3186,6 +3215,11 @@ describe("strict setting", function() {
|
||||
expect("\\text{é試}").toParse(new Settings({strict: true}));
|
||||
expect("\\text{é試}").toParse();
|
||||
});
|
||||
|
||||
it("should warn about top-level \\newline in display mode", () => {
|
||||
expect("x\\\\y").toWarn(new Settings({displayMode: true}));
|
||||
expect("x\\\\y").toParse(new Settings({displayMode: false}));
|
||||
});
|
||||
});
|
||||
|
||||
describe("Internal __* interface", function() {
|
||||
|
Reference in New Issue
Block a user