mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-06 03:38:39 +00:00
unicodeTextInMathMode setting (#1117)
* unicodeTextInMathMode setting * When `unicodeTextInMathMode` is `true`, accented letters from `unicodeSymbols.js`, and CJK and other supported languages, get added support in math mode (as requested in #895). * When `unicodeTextInMathMode` is `false, all of these stop working in math mode, and are only supported in text mode (matching XeTeX behavior). Note that this is a backwards incompatibility with some 0.9.0 alpha/betas. * Fix handling of Unicode characters ð, Å, å * Fix double handling of ð (math maps to \eth, not special Unicode character) * Remove Åå special math handling, thanks to #1125 * Forbid extraLatin when unicodeTextInMathMode is false
This commit is contained in:
committed by
Kevin Barabash
parent
7de91f73eb
commit
aed1c1e564
@@ -71,6 +71,7 @@ You can provide an object of options as the last argument to `katex.render` and
|
||||
- `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`)
|
||||
- `macros`: `object`. A collection of custom macros. Each macro is a property with a name like `\name` (written `"\\name"` in JavaScript) which maps to a string that describes the expansion of the macro. Single-character keys can also be included in which case the character will be redefined as the given macro (similar to TeX active characters).
|
||||
- `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`). In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`).
|
||||
- `unicodeTextInMathMode`: `boolean`. If `true`, supported unicode text characters like `é` and `試` will also work in math mode. (They always work in text mode.) The default is `false`, which matches XeTeX behavior; `true` emulates MathJax behavior.
|
||||
- `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior).
|
||||
|
||||
For example:
|
||||
@@ -121,6 +122,12 @@ will appear larger than 1cm in browser units.
|
||||
`align` in math mode. The `aligned` environment offers the same functionality
|
||||
but in math mode, so use that instead or define a macro that maps `align` to
|
||||
`aligned`.
|
||||
- MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
|
||||
`colorIsTextColor` option to `true` for this behavior. KaTeX's default
|
||||
behavior matches MathJax with its `color.js` extension enabled.
|
||||
- MathJax supports Unicode text characters in math mode, unlike LaTeX.
|
||||
To support this behavior in KaTeX, set the `unicodeTextInMathMode` option
|
||||
to `true`.
|
||||
|
||||
## Libraries
|
||||
|
||||
|
@@ -4,7 +4,7 @@
|
||||
import functions from "./functions";
|
||||
import environments from "./environments";
|
||||
import MacroExpander from "./MacroExpander";
|
||||
import symbols from "./symbols";
|
||||
import symbols, { extraLatin } from "./symbols";
|
||||
import { validUnit } from "./units";
|
||||
import { supportedCodepoint } from "./unicodeScripts";
|
||||
import unicodeAccents from "./unicodeAccents";
|
||||
@@ -968,9 +968,12 @@ export default class Parser {
|
||||
return newDollar(nucleus);
|
||||
}
|
||||
// At this point, we should have a symbol, possibly with accents.
|
||||
// First expand any accented base symbol according to unicodeSymbols.
|
||||
// First expand any accented base symbol according to unicodeSymbols,
|
||||
// unless we're in math mode and unicodeTextInMathMode is false
|
||||
// (XeTeX-compatible mode).
|
||||
if (unicodeSymbols.hasOwnProperty(text[0]) &&
|
||||
!symbols[this.mode][text[0]]) {
|
||||
!symbols[this.mode][text[0]] &&
|
||||
(this.settings.unicodeTextInMathMode || this.mode === "text")) {
|
||||
text = unicodeSymbols[text[0]] + text.substr(1);
|
||||
}
|
||||
// Strip off any combining characters
|
||||
@@ -986,10 +989,15 @@ export default class Parser {
|
||||
// Recognize base symbol
|
||||
let symbol = null;
|
||||
if (symbols[this.mode][text]) {
|
||||
if (this.mode === 'math' && extraLatin.indexOf(text) >= 0 &&
|
||||
!this.settings.unicodeTextInMathMode) {
|
||||
throw new ParseError(`Unicode text character ${text} used in ` +
|
||||
`math mode without unicodeTextInMathMode setting`, nucleus);
|
||||
}
|
||||
symbol = new ParseNode(symbols[this.mode][text].group,
|
||||
text, this.mode, nucleus);
|
||||
} else if (this.mode === "text" &&
|
||||
supportedCodepoint(text.charCodeAt(0))) {
|
||||
} else if (supportedCodepoint(text.charCodeAt(0)) &&
|
||||
(this.mode === "text" || this.settings.unicodeTextInMathMode)) {
|
||||
symbol = new ParseNode("textord", text, this.mode, nucleus);
|
||||
} else {
|
||||
return null; // EOF, ^, _, {, }, etc.
|
||||
|
@@ -14,6 +14,7 @@ export type SettingsOptions = {
|
||||
errorColor?: string;
|
||||
macros?: MacroMap;
|
||||
colorIsTextColor?: boolean;
|
||||
unicodeTextInMathMode?: boolean;
|
||||
maxSize?: number;
|
||||
};
|
||||
|
||||
@@ -33,6 +34,7 @@ class Settings {
|
||||
errorColor: string;
|
||||
macros: MacroMap;
|
||||
colorIsTextColor: boolean;
|
||||
unicodeTextInMathMode: boolean;
|
||||
maxSize: number;
|
||||
|
||||
constructor(options: SettingsOptions) {
|
||||
@@ -43,6 +45,8 @@ class Settings {
|
||||
this.errorColor = utils.deflt(options.errorColor, "#cc0000");
|
||||
this.macros = options.macros || {};
|
||||
this.colorIsTextColor = utils.deflt(options.colorIsTextColor, false);
|
||||
this.unicodeTextInMathMode =
|
||||
utils.deflt(options.unicodeTextInMathMode, false);
|
||||
this.maxSize = Math.max(0, utils.deflt(options.maxSize, Infinity));
|
||||
}
|
||||
}
|
||||
|
@@ -739,7 +739,7 @@ for (let i = 0; i < letters.length; i++) {
|
||||
// but they are not actually in the font, nor are they supported by the
|
||||
// Unicode accent mechanism, so they fall back to Times font and look ugly.
|
||||
// TODO(edemaine): Fix this.
|
||||
const extraLatin = "ÇÐÞçþ";
|
||||
export const extraLatin = "ÇÐÞçþ";
|
||||
for (let i = 0; i < extraLatin.length; i++) {
|
||||
const ch = extraLatin.charAt(i);
|
||||
defineSymbol(math, main, mathord, ch, ch);
|
||||
|
@@ -2864,7 +2864,8 @@ describe("Unicode accents", function() {
|
||||
"\\tilde n" +
|
||||
"\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
|
||||
"\\grave u\\acute u\\hat u\\ddot u" +
|
||||
"\\acute y\\ddot y");
|
||||
"\\acute y\\ddot y",
|
||||
{unicodeTextInMathMode: true});
|
||||
});
|
||||
|
||||
it("should parse Latin-1 letters in text mode", function() {
|
||||
@@ -2894,18 +2895,21 @@ describe("Unicode accents", function() {
|
||||
});
|
||||
|
||||
it("should parse combining characters", function() {
|
||||
expect("A\u0301C\u0301").toParseLike("Á\\acute C");
|
||||
expect("A\u0301C\u0301").toParseLike("Á\\acute C",
|
||||
{unicodeTextInMathMode: true});
|
||||
expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
|
||||
});
|
||||
|
||||
it("should parse multi-accented characters", function() {
|
||||
expect("ấā́ắ\\text{ấā́ắ}").toParse();
|
||||
expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
|
||||
// Doesn't parse quite the same as
|
||||
// "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
|
||||
});
|
||||
|
||||
it("should parse accented i's and j's", function() {
|
||||
expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
|
||||
expect("íȷ́").toParseLike("\\acute ı\\acute ȷ",
|
||||
{unicodeTextInMathMode: true});
|
||||
expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3009,6 +3013,29 @@ describe("Symbols", function() {
|
||||
});
|
||||
});
|
||||
|
||||
describe("unicodeTextInMathMode setting", function() {
|
||||
it("should allow unicode text when true", () => {
|
||||
expect("é").toParse({unicodeTextInMathMode: true});
|
||||
expect("試").toParse({unicodeTextInMathMode: true});
|
||||
});
|
||||
|
||||
it("should forbid unicode text when false", () => {
|
||||
expect("é").toNotParse({unicodeTextInMathMode: false});
|
||||
expect("試").toNotParse({unicodeTextInMathMode: false});
|
||||
});
|
||||
|
||||
it("should forbid unicode text when default", () => {
|
||||
expect("é").toNotParse();
|
||||
expect("試").toNotParse();
|
||||
});
|
||||
|
||||
it("should always allow unicode text in text mode", () => {
|
||||
expect("\\text{é試}").toParse({unicodeTextInMathMode: false});
|
||||
expect("\\text{é試}").toParse({unicodeTextInMathMode: true});
|
||||
expect("\\text{é試}").toParse();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Internal __* interface", function() {
|
||||
const latex = "\\sum_{k = 0}^{\\infty} x^k";
|
||||
const rendered = katex.renderToString(latex);
|
||||
|
@@ -95,6 +95,7 @@ describe("A MathML builder", function() {
|
||||
});
|
||||
|
||||
it('accents turn into <mover accent="true"> in MathML', function() {
|
||||
expect(getMathML("über fiancée")).toMatchSnapshot();
|
||||
expect(getMathML("über fiancée", {unicodeTextInMathMode: true}))
|
||||
.toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
|
@@ -72,9 +72,16 @@ describe("unicode", function() {
|
||||
'ÆÇÐØÞßæçðøþ}').toParse();
|
||||
});
|
||||
|
||||
it("should not parse Latin-1 outside \\text{} without setting", function() {
|
||||
const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ';
|
||||
for (const ch of chars) {
|
||||
expect(ch).toNotParse();
|
||||
}
|
||||
});
|
||||
|
||||
it("should parse Latin-1 outside \\text{}", function() {
|
||||
expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
|
||||
'ÇÐÞçðþ').toParse();
|
||||
'ÇÐÞçðþ').toParse({unicodeTextInMathMode: true});
|
||||
});
|
||||
|
||||
it("should parse all lower case Greek letters", function() {
|
||||
|
Reference in New Issue
Block a user