Support some Unicode Mathematical Alphanumeric Symbols (#1232)

* Support some  Unicode Mathematical Alphanumeric Symbols

This PR adds support for some of the characters in Unicode range U+1D400 to U+1D7FF, [Mathematical Alphanumeric Symbols](https://www.unicode.org/charts/PDF/U1D400.pdf).  Specifically, it adds support for:
* A-Z a-z bold
* A-Z a-z bold italic
* A-Z a-z Fractur
* A-Z a-z sans-serif
* A-Z double struck
•	A-Z script

Addresses issue #1215 and parts of #260.

* Fix lint error

* Fix MathML, and pick up review comments

* Fix lint error

* Add text mode. Remove sans-serif.

* Fix lint error

* Fixed \mathrm, added screenshotter test

* Change screenshotter test to an array

* Add screenshots

* Picked up review comments. Add characters
This commit is contained in:
Ron Kok
2018-05-07 17:06:16 -07:00
committed by Kevin Barabash
parent f25e08d618
commit f01f504cfe
9 changed files with 250 additions and 9 deletions

View File

@@ -10,6 +10,7 @@ import fontMetrics from "./fontMetrics";
import symbols from "./symbols";
import utils from "./utils";
import stretchy from "./stretchy";
import {wideCharacterFont} from "./wide-character";
import {calculateSize} from "./units";
import type Options from "./Options";
@@ -234,7 +235,11 @@ const makeOrd = function(
// Math mode or Old font (i.e. \rm)
const isFont = mode === "math" || (mode === "text" && options.font);
const fontOrFamily = isFont ? options.font : options.fontFamily;
if (fontOrFamily) {
if (value.charCodeAt(0) === 0xD835) {
// surrogate pairs get special treatment
const [wideFontName, wideFontClass] = wideCharacterFont(value, mode);
return makeSymbol(value, wideFontName, mode, options, [wideFontClass]);
} else if (fontOrFamily) {
let fontName;
let fontClasses;
if (fontOrFamily === "boldsymbol") {

View File

@@ -21,7 +21,9 @@ import stretchy from "./stretchy";
*/
export const makeText = function(text, mode) {
if (symbols[mode][text] && symbols[mode][text].replace) {
text = symbols[mode][text].replace;
if (text.charCodeAt(0) !== 0xD835) {
text = symbols[mode][text].replace;
}
}
return new mathMLTree.TextNode(text);

View File

@@ -108,7 +108,7 @@
font-family: KaTeX_AMS;
}
.mathbb {
.mathbb, .textbb {
font-family: KaTeX_AMS;
}
@@ -116,7 +116,7 @@
font-family: KaTeX_Caligraphic;
}
.mathfrak {
.mathfrak, .textfrak {
font-family: KaTeX_Fraktur;
}
@@ -124,11 +124,11 @@
font-family: KaTeX_Typewriter;
}
.mathscr {
.mathscr, .textscr {
font-family: KaTeX_Script;
}
.mathsf {
.mathsf, .textsf {
font-family: KaTeX_SansSerif;
}

View File

@@ -729,7 +729,6 @@ defineSymbol(text, main, spacing, "\u00a0", " ");
defineSymbol(text, main, spacing, "\u00a0", "~");
// There are lots of symbols which are the same, so we add them in afterwards.
// All of these are textords in math mode
const mathTextSymbols = "0123456789/@.\"";
for (let i = 0; i < mathTextSymbols.length; i++) {
@@ -745,13 +744,97 @@ for (let i = 0; i < textSymbols.length; i++) {
}
// All of these are textords in text mode, and mathords in math mode
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
for (let i = 0; i < letters.length; i++) {
const ch = letters.charAt(i);
defineSymbol(math, main, mathord, ch, ch);
defineSymbol(text, main, textord, ch, ch);
}
// The next loop loads wide (surrogate pair) characters.
// We support some letters in the Unicode range U+1D400 to U+1D7FF,
// Mathematical Alphanumeric Symbols.
// Some editors do not deal well with wide characters. So don't write the
// string into this file. Instead, create the string from the surrogate pair.
let wideChar = "";
for (let i = 0; i < letters.length; i++) {
const ch = letters.charAt(i);
// The hex numbers in the next line are a surrogate pair.
// 0xD835 is the high surrogate for all letters in the range we support.
// 0xDC00 is the low surrogate for bold A.
wideChar = String.fromCharCode(0xD835, 0xDC00 + i); // A-Z a-z bold
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDC34 + i); // A-Z a-z italic
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDC68 + i); // A-Z a-z bold italic
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDD04 + i); // A-Z a-z Fractur
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDDA0 + i); // A-Z a-z sans-serif
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDDD4 + i); // A-Z a-z sans bold
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDE08 + i); // A-Z a-z sans italic
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDE70 + i); // A-Z a-z monospace
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
if (i < 26) {
// KaTeX fonts have only capital letters for blackboard bold and script.
// See exception for k below.
wideChar = String.fromCharCode(0xD835, 0xDD38 + i); // A-Z double struck
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDC9C + i); // A-Z script
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
}
// TODO: Add bold script when it is supported by a KaTeX font.
}
// "k" is the only double struck lower case letter in the KaTeX fonts.
wideChar = String.fromCharCode(0xD835, 0xDD5C); // k double struck
defineSymbol(math, main, mathord, "k", wideChar);
defineSymbol(text, main, textord, "k", wideChar);
// Next, some wide character numerals
for (let i = 0; i < 10; i++) {
const ch = letters.charAt(i);
wideChar = String.fromCharCode(0xD835, 0xDFCE + i); // 0-9 bold
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDFE2 + i); // 0-9 sans serif
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDFEC + i); // 0-9 bold sans
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
wideChar = String.fromCharCode(0xD835, 0xDFF6 + i); // 0-9 monospace
defineSymbol(math, main, mathord, ch, wideChar);
defineSymbol(text, main, textord, ch, wideChar);
}
// We add these Latin-1 letters as symbols for backwards-compatibility,
// but they are not actually in the font, nor are they supported by the
// Unicode accent mechanism, so they fall back to Times font and look ugly.

110
src/wide-character.js Normal file
View File

@@ -0,0 +1,110 @@
// @flow
/**
* This file provides support for Unicode range U+1D400 to U+1D7FF,
* Mathematical Alphanumeric Symbols.
*
* Function wideCharacterFont takes a wide character as input and returns
* the font information necessary to render it properly.
*/
import type {Mode} from "./types";
import ParseError from "./ParseError";
/**
* Data below is from https://www.unicode.org/charts/PDF/U1D400.pdf
* That document sorts characters into groups by font type, say bold or italic.
*
* In the arrays below, each subarray consists three elements:
* * The CSS class of that group when in math mode.
* * The CSS class of that group when in text mode.
* * The font name, so that KaTeX can get font metrics.
*/
const wideLatinLetterData: Array<[string, string, string]> = [
["mathbf", "textbf", "Main-Bold"], // A-Z bold upright
["mathbf", "textbf", "Main-Bold"], // a-z bold upright
["mathit", "textit", "Math-Italic"], // A-Z italic
["mathit", "textit", "Math-Italic"], // a-z italic
["boldsymbol", "boldsymbol", "Main-BoldItalic"], // A-Z bold italic
["boldsymbol", "boldsymbol", "Main-BoldItalic"], // a-z bold italic
// Map fancy A-Z letters to script, not calligraphic.
// This aligns with unicode-math and math fonts (except Cambria Math).
["mathscr", "textscr", "Script-Regular"], // A-Z script
["", "", ""], // a-z script. No font
["", "", ""], // A-Z bold script. No font
["", "", ""], // a-z bold script. No font
["mathfrak", "textfrak", "Fraktur-Regular"], // A-Z Fraktur
["mathfrak", "textfrak", "Fraktur-Regular"], // a-z Fraktur
["mathbb", "textbb", "AMS-Regular"], // A-Z double-struck
["mathbb", "textbb", "AMS-Regular"], // k double-struck
["", "", ""], // A-Z bold Fraktur No font metrics
["", "", ""], // a-z bold Fraktur. No font.
["mathsf", "textsf", "SansSerif-Regular"], // A-Z sans-serif
["mathsf", "textsf", "SansSerif-Regular"], // a-z sans-serif
["mathboldsf", "textboldsf", "SansSerif-Bold"], // A-Z bold sans-serif
["mathboldsf", "textboldsf", "SansSerif-Bold"], // a-z bold sans-serif
["mathitsf", "textitsf", "SansSerif-Italic"], // A-Z italic sans-serif
["mathitsf", "textitsf", "SansSerif-Italic"], // a-z italic sans-serif
["", "", ""], // A-Z bold italic sans. No font
["", "", ""], // a-z bold italic sans. No font
["mathtt", "texttt", "Typewriter-Regular"], // A-Z monospace
["mathtt", "texttt", "Typewriter-Regular"], // a-z monospace
];
const wideNumeralData: Array<[string, string, string]> = [
["mathbf", "textbf", "Main-Bold"], // 0-9 bold
["", "", ""], // 0-9 double-struck. No KaTeX font.
["mathsf", "textsf", "SansSerif-Regular"], // 0-9 sans-serif
["mathboldsf", "textboldsf", "SansSerif-Bold"], // 0-9 bold sans-serif
["mathtt", "texttt", "Typewriter-Regular"], // 0-9 monospace
];
export const wideCharacterFont = function(
wideChar: string,
mode: Mode,
): [string, string] {
// IE doesn't support codePointAt(). So work with the surrogate pair.
const H = wideChar.charCodeAt(0); // high surrogate
const L = wideChar.charCodeAt(1); // low surrogate
const codePoint = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000;
const j = mode === "math" ? 0 : 1; // column index for CSS class.
if (0x1D400 <= codePoint && codePoint < 0x1D6A4) {
// wideLatinLetterData contains exactly 26 chars on each row.
// So we can calculate the relevant row. No traverse necessary.
const i = Math.floor((codePoint - 0x1D400) / 26);
return [wideLatinLetterData[i][2], wideLatinLetterData[i][j]];
} else if (0x1D7CE <= codePoint && codePoint <= 0x1D7FF) {
// Numerals, ten per row.
const i = Math.floor((codePoint - 0x1D7CE) / 10);
return [wideNumeralData[i][2], wideNumeralData[i][j]];
} else if (codePoint === 0x1D6A5 || codePoint === 0x1D6A6) {
// dotless i or j
return [wideLatinLetterData[0][2], wideLatinLetterData[0][j]];
} else if (0x1D6A6 < codePoint && codePoint < 0x1D7CE) {
// Greek letters. Not supported, yet.
return ["", ""];
} else {
// We don't support any wide characters outside 1D4001D7FF.
throw new ParseError("Unsupported character: " + wideChar);
}
};

View File

@@ -3006,7 +3006,7 @@ describe("Unicode", function() {
expect("±×÷∓∔∧∨∩∪≀⊎⊓⊔⊕⊖⊗⊘⊙⊚⊛⊝⊞⊟⊠⊡⊺⊻⊼⋇⋉⋊⋋⋌⋎⋏⋒⋓⩞\u22C5").toParse();
});
it("should parse delimeters", function() {
it("should build delimiters", function() {
expect("\\left\u230A\\frac{a}{b}\\right\u230B").toBuild();
expect("\\left\u2308\\frac{a}{b}\\right\u2308").toBuild();
expect("\\left\u27ee\\frac{a}{b}\\right\u27ef").toBuild();
@@ -3014,6 +3014,41 @@ describe("Unicode", function() {
expect("\\left\u23b0\\frac{a}{b}\\right\u23b1").toBuild();
expect("┌x┐ └x┘").toBuild();
});
it("should build some surrogate pairs", function() {
let wideCharStr = "";
wideCharStr += String.fromCharCode(0xD835, 0xDC00); // bold A
wideCharStr += String.fromCharCode(0xD835, 0xDC68); // bold italic A
wideCharStr += String.fromCharCode(0xD835, 0xDD04); // Fraktur A
wideCharStr += String.fromCharCode(0xD835, 0xDD38); // double-struck
wideCharStr += String.fromCharCode(0xD835, 0xDC9C); // script A
wideCharStr += String.fromCharCode(0xD835, 0xDDA0); // sans serif A
wideCharStr += String.fromCharCode(0xD835, 0xDDD4); // bold sans A
wideCharStr += String.fromCharCode(0xD835, 0xDE08); // italic sans A
wideCharStr += String.fromCharCode(0xD835, 0xDE70); // monospace A
wideCharStr += String.fromCharCode(0xD835, 0xDFCE); // bold zero
wideCharStr += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero
wideCharStr += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero
wideCharStr += String.fromCharCode(0xD835, 0xDFF6); // monospace zero
expect(wideCharStr).toBuild();
let wideCharText = "\text{";
wideCharText += String.fromCharCode(0xD835, 0xDC00); // bold A
wideCharText += String.fromCharCode(0xD835, 0xDC68); // bold italic A
wideCharText += String.fromCharCode(0xD835, 0xDD04); // Fraktur A
wideCharText += String.fromCharCode(0xD835, 0xDD38); // double-struck
wideCharText += String.fromCharCode(0xD835, 0xDC9C); // script A
wideCharText += String.fromCharCode(0xD835, 0xDDA0); // sans serif A
wideCharText += String.fromCharCode(0xD835, 0xDDD4); // bold sans A
wideCharText += String.fromCharCode(0xD835, 0xDE08); // italic sans A
wideCharText += String.fromCharCode(0xD835, 0xDE70); // monospace A
wideCharText += String.fromCharCode(0xD835, 0xDFCE); // bold zero
wideCharText += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero
wideCharText += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero
wideCharText += String.fromCharCode(0xD835, 0xDFF6); // monospace zero
wideCharText += "}";
expect(wideCharText).toBuild();
});
});
describe("The maxSize setting", function() {

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View File

@@ -292,6 +292,12 @@ SupSubHorizSpacing: |
SupSubLeftAlignReset: |
\omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}}
SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f
SurrogatePairs: |
\begin{array}{l}
𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜 \\
\text{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜} \\
\mathrm{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜}
\end{array}
SvgReset:
# This math uses a lot of SVG glyphs
tex: '\underbrace{\sqrt{\vec{x}} + \sqrt{\vec{\color{#f00}y}}}'