Implement strict mode (replacing unicodeTextInMathMode) (#1278)

* Implement strict mode (replacing unicodeTextInMathMode) Add new "strict" setting (default value false) that can take a boolean (whether to throw an error or silently ignore), string ("ignore", "warn", or "error"), or a function possibly returning such a value. This enables a variety of ways of handling or ignoring transgressions from "true" LaTeX behavior, making KaTeX easier to use while still providing the ability for strict LaTeX adherance. Resolve #1226, implementing that spec, for two existing transgressions from regular LaTeX: * src/functions/kern.js had some errors and warnings about use of (units in) math vs. text mode commands. * The former setting unicodeTextInMathMode (not in any released version) needed to be set to true to enable Unicode text symbols in math mode. Now these are controlled by the strict setting. By default, KaTeX is now very permissive, but if desired, the user can request warnings or errors. * Rewrite strict description * Add tests for strict functions * Stricter type for strict * Switch default strict setting to "warn" * Fix new flow error * Fix another flow bug
2025-10-06 03:38:39 +00:00 · 2018-05-13 14:27:30 -04:00
parent 4801ab875a
commit 7ab4f76e16
10 changed files with 208 additions and 79 deletions
--- a/README.md
+++ b/README.md
@@ -75,8 +75,10 @@ You can provide an object of options as the last argument to `katex.render` and
 - `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color that unsupported commands and invalid LaTeX are rendered in when `throwOnError` is set to `false`. (default: `#cc0000`)
 - `macros`: `object`. A collection of custom macros. Each macro is a property with a name like `\name` (written `"\\name"` in JavaScript) which maps to a string that describes the expansion of the macro. Single-character keys can also be included in which case the character will be redefined as the given macro (similar to TeX active characters).
 - `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`).  In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`).
- `unicodeTextInMathMode`: `boolean`. If `true`, supported unicode text characters like `é` and `試` will also work in math mode. (They always work in text mode.) The default is `false`, which matches XeTeX behavior; `true` emulates MathJax behavior.
 - `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior).
+- `strict`: `boolean` or `string` or `function` (default: `"warn"`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"` (the default), warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior.  A list of such features and their `errorCode`s:
+  - `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode.
+  - `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode.

 For example:

@@ -129,9 +131,6 @@ will appear larger than 1cm in browser units.
 - MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
  `colorIsTextColor` option to `true` for this behavior.  KaTeX's default
  behavior matches MathJax with its `color.js` extension enabled.
- MathJax supports Unicode text characters in math mode, unlike LaTeX.
-  To support this behavior in KaTeX, set the `unicodeTextInMathMode` option
-  to `true`.
 - KaTeX breaks lines with `\\` and `\newline` in inline math, but ignores them
  in display math (matching LaTeX's behavior, but not MathJax's behavior).
  To allow `\\` and `\newline` to break lines in display mode,
--- a/package.json
+++ b/package.json
@@ -87,6 +87,7 @@
    "nomnom": "^1.8.1"
  },
  "jest": {
+    "setupTestFrameworkScriptFile": "<rootDir>/test/setup.js",
    "snapshotSerializers": [
      "jest-serializer-html"
    ],
--- a/src/Parser.js
+++ b/src/Parser.js
@@ -941,12 +941,15 @@ export default class Parser {
                }, "text"), nucleus);
        }
        // At this point, we should have a symbol, possibly with accents.
-        // First expand any accented base symbol according to unicodeSymbols,
-        // unless we're in math mode and unicodeTextInMathMode is false
-        // (XeTeX-compatible mode).
+        // First expand any accented base symbol according to unicodeSymbols.
        if (unicodeSymbols.hasOwnProperty(text[0]) &&
-            !symbols[this.mode][text[0]] &&
-            (this.settings.unicodeTextInMathMode || this.mode === "text")) {
+            !symbols[this.mode][text[0]]) {
+            // This behavior is not strict (XeTeX-compatible) in math mode.
+            if (this.settings.strict && this.mode === "math") {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Accented Unicode text character "${text[0]}" used in ` +
+                    `math mode`, nucleus);
+            }
            text = unicodeSymbols[text[0]] + text.substr(1);
        }
        // Strip off any combining characters
@@ -962,15 +965,20 @@ export default class Parser {
        // Recognize base symbol
        let symbol = null;
        if (symbols[this.mode][text]) {
-            if (this.mode === 'math' && extraLatin.indexOf(text) >= 0 &&
-                !this.settings.unicodeTextInMathMode) {
-                throw new ParseError(`Unicode text character ${text} used in ` +
-                    `math mode without unicodeTextInMathMode setting`, nucleus);
+            if (this.settings.strict && this.mode === 'math' &&
+                extraLatin.indexOf(text) >= 0) {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Latin-1/Unicode text character "${text[0]}" used in ` +
+                    `math mode`, nucleus);
            }
            symbol = new ParseNode(symbols[this.mode][text].group,
                            text, this.mode, nucleus);
-        } else if (supportedCodepoint(text.charCodeAt(0)) &&
-            (this.mode === "text" || this.settings.unicodeTextInMathMode)) {
+        } else if (supportedCodepoint(text.charCodeAt(0))) {
+            if (this.settings.strict && this.mode === 'math') {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Unicode text character "${text[0]}" used in math mode`,
+                    nucleus);
+            }
            symbol = new ParseNode("textord", text, this.mode, nucleus);
        } else {
            return null;  // EOF, ^, _, {, }, etc.
--- a/src/Settings.js
+++ b/src/Settings.js
@@ -1,20 +1,28 @@
 // @flow
+/* eslint no-console:0 */
 /**
 * This is a module for storing settings passed into KaTeX. It correctly handles
 * default settings.
 */

 import utils from "./utils";
+import ParseError from "./ParseError.js";
+import ParseNode from "./ParseNode";
+import {Token} from "./Token";

 import type { MacroMap } from "./macros";

+export type StrictFunction =
+    (errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) =>
+    ?(boolean | string);
+
 export type SettingsOptions = {
    displayMode?: boolean;
    throwOnError?: boolean;
    errorColor?: string;
    macros?: MacroMap;
    colorIsTextColor?: boolean;
-    unicodeTextInMathMode?: boolean;
+    strict?: boolean | "ignore" | "warn" | "error" | StrictFunction;
    maxSize?: number;
 };

@@ -34,7 +42,7 @@ class Settings {
    errorColor: string;
    macros: MacroMap;
    colorIsTextColor: boolean;
-    unicodeTextInMathMode: boolean;
+    strict: boolean | "ignore" | "warn" | "error" | StrictFunction;
    maxSize: number;

    constructor(options: SettingsOptions) {
@@ -45,10 +53,37 @@ class Settings {
        this.errorColor = utils.deflt(options.errorColor, "#cc0000");
        this.macros = options.macros || {};
        this.colorIsTextColor = utils.deflt(options.colorIsTextColor, false);
-        this.unicodeTextInMathMode =
-            utils.deflt(options.unicodeTextInMathMode, false);
+        this.strict = utils.deflt(options.strict, "warn");
        this.maxSize = Math.max(0, utils.deflt(options.maxSize, Infinity));
    }
+
+    /**
+     * Report nonstrict (non-LaTeX-compatible) input.
+     * Can safely not be called if `this.strict` is false in JavaScript.
+     */
+    nonstrict(errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) {
+        let strict = this.strict;
+        if (typeof strict === "function") {
+            // Allow return value of strict function to be boolean or string
+            // (or null/undefined, meaning no further processing).
+            strict = strict(errorCode, errorMsg, token);
+        }
+        if (!strict || strict === "ignore") {
+            return;
+        } else if (strict === true || strict === "error") {
+            throw new ParseError(
+                "LaTeX-incompatible input and strict mode is set to 'error': " +
+                `${errorMsg} [${errorCode}]`, token);
+        } else if (strict === "warn") {
+            typeof console !== "undefined" && console.warn(
+                "LaTeX-incompatible input and strict mode is set to 'warn': " +
+                `${errorMsg} [${errorCode}]`);
+        } else {  // won't happen in type-safe code
+            typeof console !== "undefined" && console.warn(
+                "LaTeX-incompatible input and strict mode is set to " +
+                `unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
+        }
+    }
 }

 export default Settings;
--- a/src/functions/kern.js
+++ b/src/functions/kern.js
@@ -1,12 +1,10 @@
 //@flow
-/* eslint no-console:0 */
 // Horizontal spacing commands

 import defineFunction from "../defineFunction";
 import buildCommon from "../buildCommon";
 import mathMLTree from "../mathMLTree";
 import { calculateSize } from "../units";
-import ParseError from "../ParseError";

 // TODO: \hskip and \mskip should support plus and minus in lengths

@@ -19,22 +17,24 @@ defineFunction({
        allowedInText: true,
    },
    handler: (context, args) => {
+        if (context.parser.settings.strict) {
            const mathFunction = (context.funcName[1] === 'm');  // \mkern, \mskip
            const muUnit = (args[0].value.unit === 'mu');
            if (mathFunction) {
                if (!muUnit) {
-                typeof console !== "undefined" && console.warn(
-                    `In LaTeX, ${context.funcName} supports only mu units, ` +
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} supports only mu units, ` +
                        `not ${args[0].value.unit} units`);
                }
                if (context.parser.mode !== "math") {
-                throw new ParseError(
-                    `Can't use function '${context.funcName}' in text mode`);
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} works only in math mode`);
                }
            } else {  // !mathFunction
                if (muUnit) {
-                typeof console !== "undefined" && console.warn(
-                    `In LaTeX, ${context.funcName} does not support mu units`);
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} doesn't support mu units`);
+                }
            }
        }
        return {
--- a/static/main.js
+++ b/static/main.js
@@ -28,6 +28,17 @@ function init() {
        options.displayMode = false;
    }

+    // Use `strict=warn` for warning strict mode or `strict=error`
+    // (or `=1`/`=t`/`=true`/`=y`/`=yes`)
+    // to turn off displayMode (which is on by default).
+    if (query.strict) {
+        if (query.strict.match(/^(1|t|y|e)/)) {
+            options.strict = "error";
+        } if (query.strict && query.strict.match(/^(w)/)) {
+            options.strict = "warn";
+        }
+    }
+
    // The `before` or `pre` search parameter puts normal text before the math.
    // The `after` or `post` search parameter puts normal text after the math.
    // Example use: testing baseline alignment.
--- a/test/Warning.js
+++ b/test/Warning.js
@@ -0,0 +1,20 @@
+// @flow
+
+class Warning {
+    name: string;
+    message: string;
+    stack: string;
+
+    constructor(message: string) {
+        // $FlowFixMe
+        this.name = "Warning";
+        // $FlowFixMe
+        this.message = "Warning: " + message;
+        // $FlowFixMe
+        this.stack = new Error().stack;
+    }
+}
+// $FlowFixMe
+Warning.prototype = Object.create(Error.prototype);
+
+module.exports = Warning;
--- a/test/katex-spec.js
+++ b/test/katex-spec.js
@@ -35,7 +35,9 @@ const serializer = {

 expect.addSnapshotSerializer(serializer);

-const defaultSettings = new Settings({});
+const defaultSettings = new Settings({
+    strict: false, // deal with warnings only when desired
+});
 const defaultOptions = new Options({
    style: Style.TEXT,
    size: 5,
@@ -2928,24 +2930,23 @@ describe("A parser taking String objects", function() {

 describe("Unicode accents", function() {
    it("should parse Latin-1 letters in math mode", function() {
-        // TODO(edemaine): Unsupported Latin-1 letters in math: ÅåÇÐÞçðþ
-        expect("ÀÁÂÃÄÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäèéêëìíîïñòóôõöùúûüýÿ")
+        // TODO(edemaine): Unsupported Latin-1 letters in math: ÇÐÞçðþ
+        expect("ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ")
        .toParseLike(
-            "\\grave A\\acute A\\hat A\\tilde A\\ddot A" +
+            "\\grave A\\acute A\\hat A\\tilde A\\ddot A\\mathring A" +
            "\\grave E\\acute E\\hat E\\ddot E" +
            "\\grave I\\acute I\\hat I\\ddot I" +
            "\\tilde N" +
            "\\grave O\\acute O\\hat O\\tilde O\\ddot O" +
            "\\grave U\\acute U\\hat U\\ddot U" +
            "\\acute Y" +
-            "\\grave a\\acute a\\hat a\\tilde a\\ddot a" +
+            "\\grave a\\acute a\\hat a\\tilde a\\ddot a\\mathring a" +
            "\\grave e\\acute e\\hat e\\ddot e" +
            "\\grave ı\\acute ı\\hat ı\\ddot ı" +
            "\\tilde n" +
            "\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
            "\\grave u\\acute u\\hat u\\ddot u" +
-            "\\acute y\\ddot y",
-            {unicodeTextInMathMode: true});
+            "\\acute y\\ddot y");
    });

    it("should parse Latin-1 letters in text mode", function() {
@@ -2970,26 +2971,24 @@ describe("Unicode accents", function() {

    it("should support \\aa in text mode", function() {
        expect("\\text{\\aa\\AA}").toParseLike("\\text{\\r a\\r A}");
-        expect("\\aa").toNotParse();
-        expect("\\Aa").toNotParse();
+        expect("\\aa").toNotParse(new Settings({strict: true}));
+        expect("\\Aa").toNotParse(new Settings({strict: true}));
    });

    it("should parse combining characters", function() {
-        expect("A\u0301C\u0301").toParseLike("Á\\acute C",
-            {unicodeTextInMathMode: true});
+        expect("A\u0301C\u0301").toParseLike("Á\\acute C");
        expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
    });

    it("should parse multi-accented characters", function() {
-        expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
+        expect("ấā́ắ\\text{ấā́ắ}").toParse();
        // Doesn't parse quite the same as
        // "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
    });

    it("should parse accented i's and j's", function() {
-        expect("íȷ́").toParseLike("\\acute ı\\acute ȷ",
-            {unicodeTextInMathMode: true});
-        expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
+        expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
+        expect("ấā́ắ\\text{ấā́ắ}").toParse();
    });
 });

@@ -3154,25 +3153,37 @@ describe("Symbols", function() {
    });
 });

-describe("unicodeTextInMathMode setting", function() {
-    it("should allow unicode text when true", () => {
-        expect("é").toParse({unicodeTextInMathMode: true});
-        expect("試").toParse({unicodeTextInMathMode: true});
+describe("strict setting", function() {
+    it("should allow unicode text when not strict", () => {
+        expect("é").toParse(new Settings({strict: false}));
+        expect("試").toParse(new Settings({strict: false}));
+        expect("é").toParse(new Settings({strict: "ignore"}));
+        expect("試").toParse(new Settings({strict: "ignore"}));
+        expect("é").toParse(new Settings({strict: () => false}));
+        expect("試").toParse(new Settings({strict: () => false}));
+        expect("é").toParse(new Settings({strict: () => "ignore"}));
+        expect("試").toParse(new Settings({strict: () => "ignore"}));
    });

-    it("should forbid unicode text when false", () => {
-        expect("é").toNotParse({unicodeTextInMathMode: false});
-        expect("試").toNotParse({unicodeTextInMathMode: false});
+    it("should forbid unicode text when strict", () => {
+        expect("é").toNotParse(new Settings({strict: true}));
+        expect("試").toNotParse(new Settings({strict: true}));
+        expect("é").toNotParse(new Settings({strict: "error"}));
+        expect("試").toNotParse(new Settings({strict: "error"}));
+        expect("é").toNotParse(new Settings({strict: () => true}));
+        expect("試").toNotParse(new Settings({strict: () => true}));
+        expect("é").toNotParse(new Settings({strict: () => "error"}));
+        expect("試").toNotParse(new Settings({strict: () => "error"}));
    });

-    it("should forbid unicode text when default", () => {
-        expect("é").toNotParse();
-        expect("試").toNotParse();
+    it("should warn about unicode text when default", () => {
+        expect("é").toWarn(new Settings());
+        expect("試").toWarn(new Settings());
    });

    it("should always allow unicode text in text mode", () => {
-        expect("\\text{é試}").toParse({unicodeTextInMathMode: false});
-        expect("\\text{é試}").toParse({unicodeTextInMathMode: true});
+        expect("\\text{é試}").toParse(new Settings({strict: false}));
+        expect("\\text{é試}").toParse(new Settings({strict: true}));
        expect("\\text{é試}").toParse();
    });
 });
--- a/test/setup.js
+++ b/test/setup.js
@@ -0,0 +1,41 @@
+/* global jest: false */
+/* global expect: false */
+
+import katex from "../katex";
+import Settings from "../src/Settings";
+import Warning from "./Warning";
+
+global.console.warn = jest.fn((warning) => {
+    throw new Warning(warning);
+});
+
+const defaultSettings = new Settings({
+    strict: false, // enable dealing with warnings only when needed
+});
+
+expect.extend({
+    toWarn: function(actual, settings) {
+        const usedSettings = settings ? settings : defaultSettings;
+
+        const result = {
+            pass: false,
+            message: () =>
+                `Expected '${actual}' to generate a warning, but it succeeded`,
+        };
+
+        try {
+            katex.__renderToDomTree(actual, usedSettings);
+        } catch (e) {
+            if (e instanceof Warning) {
+                result.pass = true;
+                result.message = () =>
+                    `'${actual}' correctly generated warning: ${e.message}`;
+            } else {
+                result.message = () =>
+                    `'${actual}' failed building with unknown error: ${e.message}`;
+            }
+        }
+
+        return result;
+    },
+});
--- a/test/unicode-spec.js
+++ b/test/unicode-spec.js
@@ -8,7 +8,10 @@ import parseTree from "../src/parseTree";
 import Settings from "../src/Settings";
 import {scriptFromCodepoint, supportedCodepoint} from "../src/unicodeScripts";

-const defaultSettings = new Settings({});
+const defaultSettings = new Settings({
+    strict: false, // deal with warnings only when desired
+});
+const strictSettings = new Settings({strict: true});

 const parseAndSetResult = function(expr, result, settings) {
    try {
@@ -72,16 +75,16 @@ describe("unicode", function() {
            'ÆÇÐØÞßæçðøþ}').toParse();
    });

-    it("should not parse Latin-1 outside \\text{} without setting", function() {
+    it("should not parse Latin-1 outside \\text{} with strict", function() {
        const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ';
        for (const ch of chars) {
-            expect(ch).toNotParse();
+            expect(ch).toNotParse(strictSettings);
        }
    });

    it("should parse Latin-1 outside \\text{}", function() {
        expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
-            'ÇÐÞçðþ').toParse({unicodeTextInMathMode: true});
+            'ÇÐÞçðþ').toParse();
    });

    it("should parse all lower case Greek letters", function() {
@@ -96,8 +99,8 @@ describe("unicode", function() {
        expect('\\text{БГДЖЗЙЛФЦШЫЮЯ}').toParse();
    });

-    it("should not parse Cyrillic outside \\text{}", function() {
-        expect('БГДЖЗЙЛФЦШЫЮЯ').toNotParse();
+    it("should not parse Cyrillic outside \\text{} with strict", function() {
+        expect('БГДЖЗЙЛФЦШЫЮЯ').toNotParse(strictSettings);
    });

    it("should parse CJK inside \\text{}", function() {
@@ -105,33 +108,33 @@ describe("unicode", function() {
        expect('\\text{여보세요}').toParse();
    });

-    it("should not parse CJK outside \\text{}", function() {
-        expect('私はバナナです。').toNotParse();
-        expect('여보세요').toNotParse();
+    it("should not parse CJK outside \\text{} with strict", function() {
+        expect('私はバナナです。').toNotParse(strictSettings);
+        expect('여보세요').toNotParse(strictSettings);
    });

    it("should parse Devangari inside \\text{}", function() {
        expect('\\text{नमस्ते}').toParse();
    });

-    it("should not parse Devangari outside \\text{}", function() {
-        expect('नमस्ते').toNotParse();
+    it("should not parse Devangari outside \\text{} with strict", function() {
+        expect('नमस्ते').toNotParse(strictSettings);
    });

    it("should parse Georgian inside \\text{}", function() {
        expect('\\text{გამარჯობა}').toParse();
    });

-    it("should not parse Georgian outside \\text{}", function() {
-        expect('გამარჯობა').toNotParse();
+    it("should not parse Georgian outside \\text{} with strict", function() {
+        expect('გამარჯობა').toNotParse(strictSettings);
    });

    it("should parse extended Latin characters inside \\text{}", function() {
        expect('\\text{ěščřžůřťďňőİı}').toParse();
    });

-    it("should not parse extended Latin outside \\text{}", function() {
-        expect('ěščřžůřťďňőİı').toNotParse();
+    it("should not parse extended Latin outside \\text{} with strict", function() {
+        expect('ěščřžůřťďňőİı').toNotParse(strictSettings);
    });

 });