Add basic support for Indic scripts in addition to CJK. (#1060)

This patch just makes KaTeX recognize Unicode codepoints in the
range \u0900-\u109f so that those South and Southeast Asian scripts
do not get automatically rejected.

The patch also generalizes the way that Unicode blocks are handled
to make it easier to add support for new scripts in the future.
src/unicodeRegexes.js is replaced with the new file src/unicodeScripts.js
This commit is contained in:
David Flanagan
2018-01-12 16:14:31 -08:00
committed by Kevin Barabash
parent d6609f7319
commit 7fe6af2a82
6 changed files with 170 additions and 33 deletions

View File

@@ -6,6 +6,7 @@
import ParseError from "../src/ParseError";
import parseTree from "../src/parseTree";
import Settings from "../src/Settings";
import {scriptFromCodepoint, supportedCodepoint} from "../src/unicodeScripts";
const defaultSettings = new Settings({});
@@ -101,4 +102,46 @@ describe("unicode", function() {
expect('私はバナナです。').toNotParse();
expect('여보세요').toNotParse();
});
it("should parse Devangari inside \\text{}", function() {
expect('\\text{नमस्ते}').toParse();
});
it("should not parse Devangari outside \\text{}", function() {
expect('नमस्ते').toNotParse();
});
});
describe("unicodeScripts", () => {
const cjkRE = /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60]/;
const hangulRE = /[\uAC00-\uD7AF]/;
const brahmicRE = /[\u0900-\u109F]/;
const allRE =
/[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/;
it("supportedCodepoint() should return the correct values", () => {
for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
expect(supportedCodepoint(codepoint)).toBe(
allRE.test(String.fromCharCode(codepoint))
);
}
});
it("scriptFromCodepoint() should return correct values", () => {
for (let codepoint = 0; codepoint <= 0xffff; codepoint++) {
const character = String.fromCharCode(codepoint);
const script = scriptFromCodepoint(codepoint);
if (cjkRE.test(character)) {
expect(script).toEqual('cjk');
} else if (hangulRE.test(character)) {
expect(script).toEqual('hangul');
} else if (brahmicRE.test(character)) {
expect(script).toEqual('brahmic');
} else {
expect(script).toBe(null);
expect(supportedCodepoint(codepoint)).toBe(false);
}
}
});
});