Allow all Unicode symbols in nonstrict mode (#1217)

Change symbol parsing to allow all Unicode symbols when the appropriate strict setting allows it.  By default, this allows all symbols, but the user will get (probably two) warnings about them.
This commit is contained in:
Erik Demaine
2018-05-17 11:33:01 -04:00
committed by GitHub
parent 431434258d
commit 369b5a8276
3 changed files with 34 additions and 5 deletions

View File

@@ -84,6 +84,9 @@ You can provide an object of options as the last argument to `katex.render` and
- `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`). In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`).
- `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior).
- `strict`: `boolean` or `string` or `function` (default: `"warn"`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"` (the default), warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior. A list of such features and their `errorCode`s:
- `"unknownSymbol"`: Use of unknown Unicode symbol, which will likely also
lead to warnings about missing character metrics, and layouts may be
incorrect (especially in terms of vertical heights).
- `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode.
- `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode.
A second category of `errorCode`s never throw errors, but their strictness

View File

@@ -973,12 +973,17 @@ export default class Parser {
}
symbol = new ParseNode(symbols[this.mode][text].group,
text, this.mode, nucleus);
} else if (supportedCodepoint(text.charCodeAt(0))) {
if (this.settings.strict && this.mode === 'math') {
} else if (text.charCodeAt(0) >= 0x80) { // no symbol for e.g. ^
if (this.settings.strict) {
if (!supportedCodepoint(text.charCodeAt(0))) {
this.settings.reportNonstrict("unknownSymbol",
`Unrecognized Unicode character "${text[0]}"`, nucleus);
} else if (this.mode === "math") {
this.settings.reportNonstrict("unicodeTextInMathMode",
`Unicode text character "${text[0]}" used in math mode`,
nucleus);
}
}
symbol = new ParseNode("textord", text, this.mode, nucleus);
} else {
return null; // EOF, ^, _, {, }, etc.

View File

@@ -137,6 +137,27 @@ describe("unicode", function() {
expect('ěščřžůřťďňőİı').toNotParse(strictSettings);
});
it("should not allow emoji in strict mode", function() {
expect('✌').toNotParse(strictSettings);
expect('\\text{✌}').toNotParse(strictSettings);
const settings = new Settings({
strict: (errorCode) =>
(errorCode === "unknownSymbol" ? "error" : "ignore"),
});
expect('✌').toNotParse(settings);
expect('\\text{✌}').toNotParse(settings);
});
it("should allow emoji outside strict mode", function() {
expect('✌').toWarn();
expect('\\text{✌}').toWarn();
const settings = new Settings({
strict: (errorCode) =>
(errorCode === "unknownSymbol" ? "ignore" : "error"),
});
expect('✌').toParse(settings);
expect('\\text{✌}').toParse(settings);
});
});
describe("unicodeScripts", () => {