mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-04 18:58:39 +00:00
* unicodeTextInMathMode setting * When `unicodeTextInMathMode` is `true`, accented letters from `unicodeSymbols.js`, and CJK and other supported languages, get added support in math mode (as requested in #895). * When `unicodeTextInMathMode` is `false, all of these stop working in math mode, and are only supported in text mode (matching XeTeX behavior). Note that this is a backwards incompatibility with some 0.9.0 alpha/betas. * Fix handling of Unicode characters ð, Å, å * Fix double handling of ð (math maps to \eth, not special Unicode character) * Remove Åå special math handling, thanks to #1125 * Forbid extraLatin when unicodeTextInMathMode is false
180 lines
6.1 KiB
JavaScript
180 lines
6.1 KiB
JavaScript
/* eslint max-len:0 */
|
|
/* global beforeEach: false */
|
|
/* global expect: false */
|
|
/* global it: false */
|
|
/* global describe: false */
|
|
import ParseError from "../src/ParseError";
|
|
import parseTree from "../src/parseTree";
|
|
import Settings from "../src/Settings";
|
|
import {scriptFromCodepoint, supportedCodepoint} from "../src/unicodeScripts";
|
|
|
|
const defaultSettings = new Settings({});
|
|
|
|
const parseAndSetResult = function(expr, result, settings) {
|
|
try {
|
|
return parseTree(expr, settings || defaultSettings);
|
|
} catch (e) {
|
|
result.pass = false;
|
|
if (e instanceof ParseError) {
|
|
result.message = () => "'" + expr + "' failed " +
|
|
"parsing with error: " + e.message;
|
|
} else {
|
|
result.message = () => "'" + expr + "' failed " +
|
|
"parsing with unknown error: " + e.message;
|
|
}
|
|
}
|
|
};
|
|
|
|
describe("unicode", function() {
|
|
beforeEach(function() {
|
|
expect.extend({
|
|
|
|
toParse: function(actual, settings) {
|
|
const usedSettings = settings ? settings : defaultSettings;
|
|
|
|
const result = {
|
|
pass: true,
|
|
message: () => "'" + actual + "' succeeded parsing",
|
|
};
|
|
parseAndSetResult(actual, result, usedSettings);
|
|
return result;
|
|
},
|
|
|
|
toNotParse: function(actual, settings) {
|
|
const usedSettings = settings ? settings : defaultSettings;
|
|
|
|
const result = {
|
|
pass: false,
|
|
message: () => "Expected '" + actual + "' to fail " +
|
|
"parsing, but it succeeded",
|
|
};
|
|
|
|
try {
|
|
parseTree(actual, usedSettings);
|
|
} catch (e) {
|
|
if (e instanceof ParseError) {
|
|
result.pass = true;
|
|
result.message = () => "'" + actual + "' correctly " +
|
|
"didn't parse with error: " + e.message;
|
|
} else {
|
|
result.message = () => "'" + actual + "' failed " +
|
|
"parsing with unknown error: " + e.message;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
},
|
|
});
|
|
});
|
|
|
|
it("should parse Latin-1 inside \\text{}", function() {
|
|
expect('\\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
|
|
'ÆÇÐØÞßæçðøþ}').toParse();
|
|
});
|
|
|
|
it("should not parse Latin-1 outside \\text{} without setting", function() {
|
|
const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ';
|
|
for (const ch of chars) {
|
|
expect(ch).toNotParse();
|
|
}
|
|
});
|
|
|
|
it("should parse Latin-1 outside \\text{}", function() {
|
|
expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
|
|
'ÇÐÞçðþ').toParse({unicodeTextInMathMode: true});
|
|
});
|
|
|
|
it("should parse all lower case Greek letters", function() {
|
|
expect("αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω").toParse();
|
|
});
|
|
|
|
it("should parse math upper case Greek letters", function() {
|
|
expect("ΓΔΘΛΞΠΣΥΦΨΩ").toParse();
|
|
});
|
|
|
|
it("should parse Cyrillic inside \\text{}", function() {
|
|
expect('\\text{БГДЖЗЙЛФЦШЫЮЯ}').toParse();
|
|
});
|
|
|
|
it("should not parse Cyrillic outside \\text{}", function() {
|
|
expect('БГДЖЗЙЛФЦШЫЮЯ').toNotParse();
|
|
});
|
|
|
|
it("should parse CJK inside \\text{}", function() {
|
|
expect('\\text{私はバナナです}').toParse();
|
|
expect('\\text{여보세요}').toParse();
|
|
});
|
|
|
|
it("should not parse CJK outside \\text{}", function() {
|
|
expect('私はバナナです。').toNotParse();
|
|
expect('여보세요').toNotParse();
|
|
});
|
|
|
|
it("should parse Devangari inside \\text{}", function() {
|
|
expect('\\text{नमस्ते}').toParse();
|
|
});
|
|
|
|
it("should not parse Devangari outside \\text{}", function() {
|
|
expect('नमस्ते').toNotParse();
|
|
});
|
|
|
|
it("should parse Georgian inside \\text{}", function() {
|
|
expect('\\text{გამარჯობა}').toParse();
|
|
});
|
|
|
|
it("should not parse Georgian outside \\text{}", function() {
|
|
expect('გამარჯობა').toNotParse();
|
|
});
|
|
|
|
it("should parse extended Latin characters inside \\text{}", function() {
|
|
expect('\\text{ěščřžůřťďňőİı}').toParse();
|
|
});
|
|
|
|
it("should not parse extended Latin outside \\text{}", function() {
|
|
expect('ěščřžůřťďňőİı').toNotParse();
|
|
});
|
|
|
|
});
|
|
|
|
describe("unicodeScripts", () => {
|
|
const scriptRegExps = {
|
|
latin: /[\u0100-\u024f\u0300-\u036f]/,
|
|
cyrillic: /[\u0400-\u04ff]/,
|
|
brahmic: /[\u0900-\u109F]/,
|
|
georgian: /[\u10a0-\u10ff]/,
|
|
cjk: /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60]/,
|
|
hangul: /[\uAC00-\uD7AF]/,
|
|
};
|
|
|
|
const scriptNames = Object.keys(scriptRegExps);
|
|
|
|
const allRegExp = new RegExp(
|
|
Object.values(scriptRegExps).map(re => re.source).join('|')
|
|
);
|
|
|
|
it("supportedCodepoint() should return the correct values", () => {
|
|
for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
|
|
expect(supportedCodepoint(codepoint)).toBe(
|
|
allRegExp.test(String.fromCharCode(codepoint))
|
|
);
|
|
}
|
|
});
|
|
|
|
it("scriptFromCodepoint() should return correct values", () => {
|
|
outer: for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
|
|
const character = String.fromCharCode(codepoint);
|
|
const script = scriptFromCodepoint(codepoint);
|
|
|
|
for (const scriptName of scriptNames) {
|
|
if (scriptRegExps[scriptName].test(character)) {
|
|
expect(script).toEqual(scriptName);
|
|
continue outer;
|
|
}
|
|
}
|
|
|
|
expect(script).toBe(null);
|
|
expect(supportedCodepoint(codepoint)).toBe(false);
|
|
}
|
|
});
|
|
});
|