Support more scripts in \text{} environments. (#1076)

* Support more scripts in \text{} environments.

This diff is a follow-up to PR #1060 which added support for Indic scripts.
In order to support Czech, Turkish and Hungarian text (at least) inside
\text{} environments, we need to recognize the Latin Extended A and B
Unicode blocks. The patch also adds support for Georgian, and enhances
support for Cyrillic by defining the entire Cyrillic unicode block instead
of defining symbols for a subset of Cyrillic letters as we did previously.

* Only return fontMetrics for supported Unicode scripts in text mode

The Unicode scripts listed in unicodeScripts.js are supported in text mode
but getCharacterMetrics() was returning fake metrics for them even in
math mode. This caused bad handling of \boldsymbol\imath

* use Mode from types.js
This commit is contained in:
David Flanagan
2018-01-21 21:53:17 -08:00
committed by Kevin Barabash
parent 5f32b71c85
commit 853e2a4fb7
7 changed files with 90 additions and 45 deletions

View File

@@ -110,38 +110,63 @@ describe("unicode", function() {
it("should not parse Devangari outside \\text{}", function() {
expect('नमस्ते').toNotParse();
});
it("should parse Georgian inside \\text{}", function() {
expect('\\text{გამარჯობა}').toParse();
});
it("should not parse Georgian outside \\text{}", function() {
expect('გამარჯობა').toNotParse();
});
it("should parse extended Latin characters inside \\text{}", function() {
expect('\\text{ěščřžůřťďňőİı}').toParse();
});
it("should not parse extended Latin outside \\text{}", function() {
expect('ěščřžůřťďňőİı').toNotParse();
});
});
describe("unicodeScripts", () => {
const cjkRE = /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60]/;
const hangulRE = /[\uAC00-\uD7AF]/;
const brahmicRE = /[\u0900-\u109F]/;
const allRE =
/[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/;
const scriptRegExps = {
latin: /[\u0100-\u024f\u0300-\u036f]/,
cyrillic: /[\u0400-\u04ff]/,
brahmic: /[\u0900-\u109F]/,
georgian: /[\u10a0-\u10ff]/,
cjk: /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60]/,
hangul: /[\uAC00-\uD7AF]/,
};
const scriptNames = Object.keys(scriptRegExps);
const allRegExp = new RegExp(
Object.values(scriptRegExps).map(re => re.source).join('|')
);
it("supportedCodepoint() should return the correct values", () => {
for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
expect(supportedCodepoint(codepoint)).toBe(
allRE.test(String.fromCharCode(codepoint))
allRegExp.test(String.fromCharCode(codepoint))
);
}
});
it("scriptFromCodepoint() should return correct values", () => {
for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
outer: for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
const character = String.fromCharCode(codepoint);
const script = scriptFromCodepoint(codepoint);
if (cjkRE.test(character)) {
expect(script).toEqual('cjk');
} else if (hangulRE.test(character)) {
expect(script).toEqual('hangul');
} else if (brahmicRE.test(character)) {
expect(script).toEqual('brahmic');
} else {
expect(script).toBe(null);
expect(supportedCodepoint(codepoint)).toBe(false);
for (const scriptName of scriptNames) {
if (scriptRegExps[scriptName].test(character)) {
expect(script).toEqual(scriptName);
continue outer;
}
}
expect(script).toBe(null);
expect(supportedCodepoint(codepoint)).toBe(false);
}
});
});