Files
KaTeX/src/unicodeMake.js
Erik Demaine 484d44ee70 Unicode accents (#992)
* Unicode accents

* Lexer now looks for combining dicritical marks and adds them to the same character
* Parser's `parseSymbol` now recognizes both combined and uncombined forms of Unicode accents, and builds accent objects just like the accent functions
* Added CJK support to math mode (not just text mode)

* Add invalid combining character test

* Add MathML test

* Add weak support for other Latin-1 characters

This maintains backwards compatibility, but it uses the wrong font.
There's a TODO to fix this later.

Also refactor symbol code to use for..of

* Update Unicode screenshot

* Remove dot from accented i and j (in math mode)

Also add dotless Unicode characters to support some accented i's and j's

* Fix \imath, \jmath, \pounds, and more tests

* Switch from for..of to .split().forEach()

Save around 800 bytes in minified code

* Fix split

* normalize() detection

* Convert back to vanilla for loops

* Fix merge

* Move normalize dependency to unicodeMake.js

* Make unicodeSymbols into a lookup table instead of macros

This is important for multi-accented characters.

* Add comments about when to run

* Move symbols definition into unicodeMake/Symbols.js

* Remove CJK support in text mode

* Add missing semicolon

* Refactor unicodeAccents to its own file

* Dotless i/j support in text mode

* Remove excess character mappings

* Fix Åå in math mode (still via Times)

* Update to support #1030

* Add accented Greek letter support (for supported Greek symbols)

* Update screenshot

* remove Æ, æ, Ø, ø, and ß from math mode test
2017-12-28 23:32:45 -07:00

60 lines
2.1 KiB
JavaScript

/* eslint no-console:0 */
// This is an internal Node tool, not part of the KaTeX distribution,
// whose purpose is to generate unicodeSymbols.js in this directory.
// In this way, only this tool, and not the distribution/browser,
// needs String's normalize function.
//
// This tool should be run (via `node unicodeMake.js` or `make unicode`)
// whenever KaTeX adds support for new accents, and whenever
// the Unicode spec adds new symbols that should be supported.
const accents = require('./unicodeAccents');
console.log("// This file is GENERATED by unicodeMake.js. DO NOT MODIFY.");
console.log("");
const encode = function(string) {
let output = '"';
for (let i = 0; i < string.length; i++) {
let hex = string.charCodeAt(i).toString(16);
while (hex.length < 4) {
hex = `0${hex}`;
}
output += `\\u${hex}`;
}
output = `${output}"`;
return output;
};
console.log("export default {");
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +
"αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψωΓΔΘΛΞΠΣΥΦΨΩ";
for (const letter of letters) {
for (const accent of Object.getOwnPropertyNames(accents)) {
const combined = letter + accent;
const normalized = combined.normalize('NFC');
if (normalized.length === 1) {
console.log(
` ${encode(normalized)}: ${encode(combined)},`
+ ` // ${normalized} = ${accents[accent].text}{${letter}}`);
}
for (const accent2 of Object.getOwnPropertyNames(accents)) {
if (accent === accent2) {
continue;
}
const combined2 = combined + accent2;
const normalized2 = combined2.normalize('NFC');
if (normalized2.length === 1) {
console.log(
` ${encode(normalized2)}: ${encode(combined2)},`
+ ` // ${normalized2} = ${accents[accent].text}`
+ `${accents[accent2].text}{${letter}}`);
}
}
}
}
console.log("};");