Strict setting controls \newline display-mode behavior; fix MacroExpander space handling (#1314)

* Strict setting controls \newline display-mode behavior

* Bug-fix space handling in macros

Whitespace after a \controlWord is now handled within the lexer, not by the
MacroExpander.  This way, \\ expanding to \newline doesn't accidentally
cause spaces to get consumed.

* Rename nonstrict -> reportNonstrict; strictBehavior -> useStrictBehavior

* Second category of errorCodes
This commit is contained in:
Erik Demaine
2018-05-16 09:37:41 -04:00
committed by GitHub
parent 7ab4f76e16
commit 1ed99d9ff3
14 changed files with 155 additions and 55 deletions

View File

@@ -79,6 +79,11 @@ You can provide an object of options as the last argument to `katex.render` and
- `strict`: `boolean` or `string` or `function` (default: `"warn"`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"` (the default), warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior. A list of such features and their `errorCode`s:
- `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode.
- `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode.
A second category of `errorCode`s never throw errors, but their strictness
affects the behavior of KaTeX:
- `"newLineInDisplayMode"`: Use of `\\` or `\newline` in display mode
(outside an array/tabular environment). In strict mode, no line break
results, as in LaTeX.
For example:
@@ -131,13 +136,6 @@ will appear larger than 1cm in browser units.
- MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
`colorIsTextColor` option to `true` for this behavior. KaTeX's default
behavior matches MathJax with its `color.js` extension enabled.
- KaTeX breaks lines with `\\` and `\newline` in inline math, but ignores them
in display math (matching LaTeX's behavior, but not MathJax's behavior).
To allow `\\` and `\newline` to break lines in display mode,
add the following CSS rule:
```css
.katex-display > .katex > .katex-html > .newline { display: block !important; }
```
## Libraries

View File

@@ -33,14 +33,19 @@ import {LexerInterface, Token} from "./Token";
* If there is no matching function or symbol definition, the Parser will
* still reject the input.
*/
const spaceRegexString = "[ \r\n\t]";
const commentRegexString = "%[^\n]*[\n]";
const controlWordRegexString = "\\\\[a-zA-Z@]+";
const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
const controlWordWhitespaceRegexString =
`${controlWordRegexString}${spaceRegexString}*`;
const controlWordWhitespaceRegex = new RegExp(
`^(${controlWordRegexString})${spaceRegexString}*$`);
const combiningDiacriticalMarkString = "[\u0300-\u036f]";
export const combiningDiacriticalMarksEndRegex =
new RegExp(`${combiningDiacriticalMarkString}+$`);
const tokenRegex = new RegExp(
"([ \r\n\t]+)|" + // whitespace
`(${spaceRegexString}+)|` + // whitespace
`(${commentRegexString}` + // comments
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
`${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -48,7 +53,7 @@ const tokenRegex = new RegExp(
`${combiningDiacriticalMarkString}*` + // ...plus accents
"|\\\\verb\\*([^]).*?\\3" + // \verb*
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
`|${controlWordRegexString}` + // \macroName
`|${controlWordWhitespaceRegexString}` + // \macroName + spaces
`|${controlSymbolRegexString}` + // \\, \', etc.
")"
);
@@ -85,11 +90,17 @@ export default class Lexer implements LexerInterface {
`Unexpected character: '${input[pos]}'`,
new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
}
const text = match[2] || " ";
let text = match[2] || " ";
const start = this.pos;
this.pos += match[0].length;
const end = this.pos;
// Trim any trailing whitespace from control word match
const controlMatch = text.match(controlWordWhitespaceRegex);
if (controlMatch) {
text = controlMatch[1];
}
if (commentRegex.test(text)) {
return this.lex();
} else {

View File

@@ -4,7 +4,7 @@
* until only non-macro tokens remain.
*/
import Lexer, {controlWordRegex} from "./Lexer";
import Lexer from "./Lexer";
import {Token} from "./Token";
import builtinMacros from "./macros";
import type {Mode} from "./types";
@@ -143,11 +143,6 @@ export default class MacroExpander implements MacroContextInterface {
expandOnce(): Token | Token[] {
const topToken = this.popToken();
const name = topToken.text;
const isMacro = (name.charAt(0) === "\\");
if (isMacro && controlWordRegex.test(name)) {
// Consume all spaces after \macro (but not \\, \', etc.)
this.consumeSpaces();
}
if (!this.macros.hasOwnProperty(name)) {
// Fully expanded
this.pushToken(topToken);

View File

@@ -121,7 +121,7 @@ type ParseNodeTypes = {
|},
"cr": {|
type: "cr",
//newRow: boolean,
newRow: boolean,
newLine: boolean,
size: ?ParseNode<*>,
|},

View File

@@ -150,7 +150,7 @@ export default class Parser {
return expression;
}
static endOfExpression = ["}", "\\end", "\\right", "&", "\\cr"];
static endOfExpression = ["}", "\\end", "\\right", "&"];
/**
* Parses an "expression", which is a list of atoms.
@@ -946,7 +946,7 @@ export default class Parser {
!symbols[this.mode][text[0]]) {
// This behavior is not strict (XeTeX-compatible) in math mode.
if (this.settings.strict && this.mode === "math") {
this.settings.nonstrict("unicodeTextInMathMode",
this.settings.reportNonstrict("unicodeTextInMathMode",
`Accented Unicode text character "${text[0]}" used in ` +
`math mode`, nucleus);
}
@@ -967,7 +967,7 @@ export default class Parser {
if (symbols[this.mode][text]) {
if (this.settings.strict && this.mode === 'math' &&
extraLatin.indexOf(text) >= 0) {
this.settings.nonstrict("unicodeTextInMathMode",
this.settings.reportNonstrict("unicodeTextInMathMode",
`Latin-1/Unicode text character "${text[0]}" used in ` +
`math mode`, nucleus);
}
@@ -975,7 +975,7 @@ export default class Parser {
text, this.mode, nucleus);
} else if (supportedCodepoint(text.charCodeAt(0))) {
if (this.settings.strict && this.mode === 'math') {
this.settings.nonstrict("unicodeTextInMathMode",
this.settings.reportNonstrict("unicodeTextInMathMode",
`Unicode text character "${text[0]}" used in math mode`,
nucleus);
}

View File

@@ -61,7 +61,8 @@ class Settings {
* Report nonstrict (non-LaTeX-compatible) input.
* Can safely not be called if `this.strict` is false in JavaScript.
*/
nonstrict(errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) {
reportNonstrict(errorCode: string, errorMsg: string,
token?: Token | ParseNode<*>) {
let strict = this.strict;
if (typeof strict === "function") {
// Allow return value of strict function to be boolean or string
@@ -84,6 +85,45 @@ class Settings {
`unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
}
}
/**
* Check whether to apply strict (LaTeX-adhering) behavior for unusual
* input (like `\\`). Unlike `nonstrict`, will not throw an error;
* instead, "error" translates to a return value of `true`, while "ignore"
* translates to a return value of `false`. May still print a warning:
* "warn" prints a warning and returns `false`.
* This is for the second category of `errorCode`s listed in the README.
*/
useStrictBehavior(errorCode: string, errorMsg: string,
token?: Token | ParseNode<*>) {
let strict = this.strict;
if (typeof strict === "function") {
// Allow return value of strict function to be boolean or string
// (or null/undefined, meaning no further processing).
// But catch any exceptions thrown by function, treating them
// like "error".
try {
strict = strict(errorCode, errorMsg, token);
} catch (error) {
strict = "error";
}
}
if (!strict || strict === "ignore") {
return false;
} else if (strict === true || strict === "error") {
return true;
} else if (strict === "warn") {
typeof console !== "undefined" && console.warn(
"LaTeX-incompatible input and strict mode is set to 'warn': " +
`${errorMsg} [${errorCode}]`);
return false;
} else { // won't happen in type-safe code
typeof console !== "undefined" && console.warn(
"LaTeX-incompatible input and strict mode is set to " +
`unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
return false;
}
}
}
export default Settings;

View File

@@ -55,6 +55,10 @@ function parseArray(
result: ArrayEnvNodeData,
style: StyleStr,
): ParseNode<*> {
// Parse body of array with \\ temporarily mapped to \cr
const oldNewline = parser.gullet.macros["\\\\"];
parser.gullet.macros["\\\\"] = "\\cr";
let row = [];
const body = [row];
const rowGaps = [];
@@ -64,7 +68,7 @@ function parseArray(
numHLinesBeforeRow.push(getNumHLines(parser));
while (true) { // eslint-disable-line no-constant-condition
let cell = parser.parseExpression(false, "\\\\");
let cell = parser.parseExpression(false, "\\cr");
cell = new ParseNode("ordgroup", cell, parser.mode);
if (style) {
cell = new ParseNode("styling", {
@@ -87,7 +91,7 @@ function parseArray(
body.pop();
}
break;
} else if (next === "\\\\" || next === "\\cr") {
} else if (next === "\\cr") {
const cr = parser.parseFunction();
if (!cr) {
throw new ParseError(`Failed to parse function after ${next}`);
@@ -107,6 +111,7 @@ function parseArray(
result.body = body;
result.rowGaps = rowGaps;
result.numHLinesBeforeRow = numHLinesBeforeRow;
parser.gullet.macros["\\\\"] = oldNewline;
return new ParseNode("array", result, parser.mode);
}

View File

@@ -7,9 +7,14 @@ import mathMLTree from "../mathMLTree";
import { calculateSize } from "../units";
import ParseError from "../ParseError";
// \\ is a macro mapping to either \cr or \newline. Because they have the
// same signature, we implement them as one megafunction, with newRow
// indicating whether we're in the \cr case, and newLine indicating whether
// to break the line in the \newline case.
defineFunction({
type: "cr",
names: ["\\\\", "\\cr", "\\newline"],
names: ["\\cr", "\\newline"],
props: {
numArgs: 0,
numOptionalArgs: 1,
@@ -18,39 +23,53 @@ defineFunction({
},
handler: (context, args, optArgs) => {
const newRow = (context.funcName === "\\cr");
let newLine = false;
if (!newRow) {
if (context.parser.settings.displayMode &&
context.parser.settings.useStrictBehavior(
"newLineInDisplayMode", "In LaTeX, \\\\ or \\newline " +
"does nothing in display mode")) {
newLine = false;
} else {
newLine = true;
}
}
return {
type: "cr",
// \\ and \cr both end the row in a tabular environment
// This flag isn't currently needed by environments/array.js
//newRow: context.funcName !== "\\newline",
// \\ and \newline both end the line in an inline math environment
newLine: context.funcName !== "\\cr",
newLine,
newRow,
size: optArgs[0],
};
},
// The following builders are called only at the top level,
// not within tabular environments.
// not within tabular/array environments.
htmlBuilder: (group, options) => {
if (!group.value.newLine) {
if (group.value.newRow) {
throw new ParseError(
"\\cr valid only within a tabular environment");
"\\cr valid only within a tabular/array environment");
}
const span = buildCommon.makeSpan(["mspace", "newline"], [], options);
if (group.value.size) {
span.style.marginTop =
calculateSize(group.value.size.value, options) + "em";
const span = buildCommon.makeSpan(["mspace"], [], options);
if (group.value.newLine) {
span.classes.push("newline");
if (group.value.size) {
span.style.marginTop =
calculateSize(group.value.size.value, options) + "em";
}
}
return span;
},
mathmlBuilder: (group, options) => {
const node = new mathMLTree.MathNode("mspace");
node.setAttribute("linebreak", "newline");
if (group.value.size) {
node.setAttribute("height",
calculateSize(group.value.size.value, options) + "em");
if (group.value.newLine) {
node.setAttribute("linebreak", "newline");
if (group.value.size) {
node.setAttribute("height",
calculateSize(group.value.size.value, options) + "em");
}
}
return node;
},

View File

@@ -22,17 +22,17 @@ defineFunction({
const muUnit = (args[0].value.unit === 'mu');
if (mathFunction) {
if (!muUnit) {
context.parser.settings.nonstrict("mathVsTextUnits",
context.parser.settings.reportNonstrict("mathVsTextUnits",
`LaTeX's ${context.funcName} supports only mu units, ` +
`not ${args[0].value.unit} units`);
}
if (context.parser.mode !== "math") {
context.parser.settings.nonstrict("mathVsTextUnits",
context.parser.settings.reportNonstrict("mathVsTextUnits",
`LaTeX's ${context.funcName} works only in math mode`);
}
} else { // !mathFunction
if (muUnit) {
context.parser.settings.nonstrict("mathVsTextUnits",
context.parser.settings.reportNonstrict("mathVsTextUnits",
`LaTeX's ${context.funcName} doesn't support mu units`);
}
}

View File

@@ -30,11 +30,6 @@
> .katex-html {
display: inline-block;
/* \newline doesn't do anything in display mode */
> .newline {
display: none;
}
}
}
}
@@ -66,7 +61,7 @@
}
.katex-html {
/* \newline is an empty block at top level of inline mode */
/* \newline is an empty block at top level, between .base elements */
> .newline {
display: block;
}

View File

@@ -357,6 +357,9 @@ defineMacro("\\thickspace", "\\;"); // \let\thickspace\;
//////////////////////////////////////////////////////////////////////
// LaTeX source2e
// \\ defaults to \newline, but changes to \cr within array environment
defineMacro("\\\\", "\\newline");
// \def\TeX{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125emX\@}
// TODO: Doesn't normally work in math mode because \@ fails. KaTeX doesn't
// support \@ yet, so that's omitted, and we add \text so that the result

View File

@@ -24,4 +24,4 @@ export type ArgType = "color" | "size" | "url" | "original" | Mode;
export type StyleStr = "text" | "display" | "script" | "scriptscript";
// Allowable token text for "break" arguments in parser
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\\\";
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\cr";

View File

@@ -60,12 +60,12 @@ describe("Parser:", function() {
it("rejects repeated infix operators", function() {
expect("1\\over 2\\over 3").toFailWithParseError(
"only one infix operator per group at position 9: " +
"1\\over 2\\̲o̲v̲e̲r̲ 3");
"1\\over 2\\̲o̲v̲e̲r̲ ̲3");
});
it("rejects conflicting infix operators", function() {
expect("1\\over 2\\choose 3").toFailWithParseError(
"only one infix operator per group at position 9: " +
"1\\over 2\\̲c̲h̲o̲o̲s̲e̲ 3");
"1\\over 2\\̲c̲h̲o̲o̲s̲e̲ ̲3");
});
});

View File

@@ -181,7 +181,7 @@ beforeEach(function() {
expect(actual).toParse(usedSettings);
try {
_getBuilt(actual, settings);
_getBuilt(actual, usedSettings);
} catch (e) {
result.pass = false;
if (e instanceof ParseError) {
@@ -196,6 +196,31 @@ beforeEach(function() {
return result;
},
toNotBuild: function(actual, settings) {
const usedSettings = settings ? settings : defaultSettings;
const result = {
pass: false,
message: () => "Expected '" + actual + "' to fail " +
"building, but it succeeded",
};
try {
_getBuilt(actual, usedSettings);
} catch (e) {
if (e instanceof ParseError) {
result.pass = true;
result.message = () => "'" + actual + "' correctly " +
"didn't build with error: " + e.message;
} else {
result.message = () => "'" + actual + "' failed " +
"building with unknown error: " + e.message;
}
}
return result;
},
toParseLike: function(actual, expected, settings) {
const usedSettings = settings ? settings : defaultSettings;
@@ -2719,6 +2744,10 @@ describe("A macro expander", function() {
compareParseTree("\\text{\\foo }", "\\text{}", {"\\foo": "\\relax"});
});
it("should not consume spaces after control-word expansion", function() {
compareParseTree("\\text{\\\\ }", "\\text{ }", {"\\\\": "\\relax"});
});
it("should consume spaces after \\relax", function() {
compareParseTree("\\text{\\relax }", "\\text{}");
});
@@ -3133,7 +3162,7 @@ describe("Newlines via \\\\ and \\newline", function() {
});
it("should not allow \\cr at top level", () => {
expect("hello \\cr world").toNotParse();
expect("hello \\cr world").toNotBuild();
});
});
@@ -3186,6 +3215,11 @@ describe("strict setting", function() {
expect("\\text{é試}").toParse(new Settings({strict: true}));
expect("\\text{é試}").toParse();
});
it("should warn about top-level \\newline in display mode", () => {
expect("x\\\\y").toWarn(new Settings({displayMode: true}));
expect("x\\\\y").toParse(new Settings({displayMode: false}));
});
});
describe("Internal __* interface", function() {