mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 03:08:40 +00:00
Support some Unicode Mathematical Alphanumeric Symbols (#1232)
* Support some Unicode Mathematical Alphanumeric Symbols This PR adds support for some of the characters in Unicode range U+1D400 to U+1D7FF, [Mathematical Alphanumeric Symbols](https://www.unicode.org/charts/PDF/U1D400.pdf). Specifically, it adds support for: * A-Z a-z bold * A-Z a-z bold italic * A-Z a-z Fractur * A-Z a-z sans-serif * A-Z double struck • A-Z script Addresses issue #1215 and parts of #260. * Fix lint error * Fix MathML, and pick up review comments * Fix lint error * Add text mode. Remove sans-serif. * Fix lint error * Fixed \mathrm, added screenshotter test * Change screenshotter test to an array * Add screenshots * Picked up review comments. Add characters
This commit is contained in:
@@ -10,6 +10,7 @@ import fontMetrics from "./fontMetrics";
|
||||
import symbols from "./symbols";
|
||||
import utils from "./utils";
|
||||
import stretchy from "./stretchy";
|
||||
import {wideCharacterFont} from "./wide-character";
|
||||
import {calculateSize} from "./units";
|
||||
|
||||
import type Options from "./Options";
|
||||
@@ -234,7 +235,11 @@ const makeOrd = function(
|
||||
// Math mode or Old font (i.e. \rm)
|
||||
const isFont = mode === "math" || (mode === "text" && options.font);
|
||||
const fontOrFamily = isFont ? options.font : options.fontFamily;
|
||||
if (fontOrFamily) {
|
||||
if (value.charCodeAt(0) === 0xD835) {
|
||||
// surrogate pairs get special treatment
|
||||
const [wideFontName, wideFontClass] = wideCharacterFont(value, mode);
|
||||
return makeSymbol(value, wideFontName, mode, options, [wideFontClass]);
|
||||
} else if (fontOrFamily) {
|
||||
let fontName;
|
||||
let fontClasses;
|
||||
if (fontOrFamily === "boldsymbol") {
|
||||
|
@@ -21,7 +21,9 @@ import stretchy from "./stretchy";
|
||||
*/
|
||||
export const makeText = function(text, mode) {
|
||||
if (symbols[mode][text] && symbols[mode][text].replace) {
|
||||
text = symbols[mode][text].replace;
|
||||
if (text.charCodeAt(0) !== 0xD835) {
|
||||
text = symbols[mode][text].replace;
|
||||
}
|
||||
}
|
||||
|
||||
return new mathMLTree.TextNode(text);
|
||||
|
@@ -108,7 +108,7 @@
|
||||
font-family: KaTeX_AMS;
|
||||
}
|
||||
|
||||
.mathbb {
|
||||
.mathbb, .textbb {
|
||||
font-family: KaTeX_AMS;
|
||||
}
|
||||
|
||||
@@ -116,7 +116,7 @@
|
||||
font-family: KaTeX_Caligraphic;
|
||||
}
|
||||
|
||||
.mathfrak {
|
||||
.mathfrak, .textfrak {
|
||||
font-family: KaTeX_Fraktur;
|
||||
}
|
||||
|
||||
@@ -124,11 +124,11 @@
|
||||
font-family: KaTeX_Typewriter;
|
||||
}
|
||||
|
||||
.mathscr {
|
||||
.mathscr, .textscr {
|
||||
font-family: KaTeX_Script;
|
||||
}
|
||||
|
||||
.mathsf {
|
||||
.mathsf, .textsf {
|
||||
font-family: KaTeX_SansSerif;
|
||||
}
|
||||
|
||||
|
@@ -729,7 +729,6 @@ defineSymbol(text, main, spacing, "\u00a0", " ");
|
||||
defineSymbol(text, main, spacing, "\u00a0", "~");
|
||||
|
||||
// There are lots of symbols which are the same, so we add them in afterwards.
|
||||
|
||||
// All of these are textords in math mode
|
||||
const mathTextSymbols = "0123456789/@.\"";
|
||||
for (let i = 0; i < mathTextSymbols.length; i++) {
|
||||
@@ -745,13 +744,97 @@ for (let i = 0; i < textSymbols.length; i++) {
|
||||
}
|
||||
|
||||
// All of these are textords in text mode, and mathords in math mode
|
||||
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
for (let i = 0; i < letters.length; i++) {
|
||||
const ch = letters.charAt(i);
|
||||
defineSymbol(math, main, mathord, ch, ch);
|
||||
defineSymbol(text, main, textord, ch, ch);
|
||||
}
|
||||
|
||||
// The next loop loads wide (surrogate pair) characters.
|
||||
// We support some letters in the Unicode range U+1D400 to U+1D7FF,
|
||||
// Mathematical Alphanumeric Symbols.
|
||||
// Some editors do not deal well with wide characters. So don't write the
|
||||
// string into this file. Instead, create the string from the surrogate pair.
|
||||
let wideChar = "";
|
||||
for (let i = 0; i < letters.length; i++) {
|
||||
const ch = letters.charAt(i);
|
||||
|
||||
// The hex numbers in the next line are a surrogate pair.
|
||||
// 0xD835 is the high surrogate for all letters in the range we support.
|
||||
// 0xDC00 is the low surrogate for bold A.
|
||||
wideChar = String.fromCharCode(0xD835, 0xDC00 + i); // A-Z a-z bold
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDC34 + i); // A-Z a-z italic
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDC68 + i); // A-Z a-z bold italic
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDD04 + i); // A-Z a-z Fractur
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDDA0 + i); // A-Z a-z sans-serif
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDDD4 + i); // A-Z a-z sans bold
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDE08 + i); // A-Z a-z sans italic
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDE70 + i); // A-Z a-z monospace
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
if (i < 26) {
|
||||
// KaTeX fonts have only capital letters for blackboard bold and script.
|
||||
// See exception for k below.
|
||||
wideChar = String.fromCharCode(0xD835, 0xDD38 + i); // A-Z double struck
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDC9C + i); // A-Z script
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
}
|
||||
|
||||
// TODO: Add bold script when it is supported by a KaTeX font.
|
||||
}
|
||||
// "k" is the only double struck lower case letter in the KaTeX fonts.
|
||||
wideChar = String.fromCharCode(0xD835, 0xDD5C); // k double struck
|
||||
defineSymbol(math, main, mathord, "k", wideChar);
|
||||
defineSymbol(text, main, textord, "k", wideChar);
|
||||
|
||||
// Next, some wide character numerals
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const ch = letters.charAt(i);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDFCE + i); // 0-9 bold
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDFE2 + i); // 0-9 sans serif
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDFEC + i); // 0-9 bold sans
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
|
||||
wideChar = String.fromCharCode(0xD835, 0xDFF6 + i); // 0-9 monospace
|
||||
defineSymbol(math, main, mathord, ch, wideChar);
|
||||
defineSymbol(text, main, textord, ch, wideChar);
|
||||
}
|
||||
|
||||
// We add these Latin-1 letters as symbols for backwards-compatibility,
|
||||
// but they are not actually in the font, nor are they supported by the
|
||||
// Unicode accent mechanism, so they fall back to Times font and look ugly.
|
||||
|
110
src/wide-character.js
Normal file
110
src/wide-character.js
Normal file
@@ -0,0 +1,110 @@
|
||||
// @flow
|
||||
|
||||
/**
|
||||
* This file provides support for Unicode range U+1D400 to U+1D7FF,
|
||||
* Mathematical Alphanumeric Symbols.
|
||||
*
|
||||
* Function wideCharacterFont takes a wide character as input and returns
|
||||
* the font information necessary to render it properly.
|
||||
*/
|
||||
|
||||
import type {Mode} from "./types";
|
||||
import ParseError from "./ParseError";
|
||||
|
||||
/**
|
||||
* Data below is from https://www.unicode.org/charts/PDF/U1D400.pdf
|
||||
* That document sorts characters into groups by font type, say bold or italic.
|
||||
*
|
||||
* In the arrays below, each subarray consists three elements:
|
||||
* * The CSS class of that group when in math mode.
|
||||
* * The CSS class of that group when in text mode.
|
||||
* * The font name, so that KaTeX can get font metrics.
|
||||
*/
|
||||
|
||||
const wideLatinLetterData: Array<[string, string, string]> = [
|
||||
["mathbf", "textbf", "Main-Bold"], // A-Z bold upright
|
||||
["mathbf", "textbf", "Main-Bold"], // a-z bold upright
|
||||
|
||||
["mathit", "textit", "Math-Italic"], // A-Z italic
|
||||
["mathit", "textit", "Math-Italic"], // a-z italic
|
||||
|
||||
["boldsymbol", "boldsymbol", "Main-BoldItalic"], // A-Z bold italic
|
||||
["boldsymbol", "boldsymbol", "Main-BoldItalic"], // a-z bold italic
|
||||
|
||||
// Map fancy A-Z letters to script, not calligraphic.
|
||||
// This aligns with unicode-math and math fonts (except Cambria Math).
|
||||
["mathscr", "textscr", "Script-Regular"], // A-Z script
|
||||
["", "", ""], // a-z script. No font
|
||||
|
||||
["", "", ""], // A-Z bold script. No font
|
||||
["", "", ""], // a-z bold script. No font
|
||||
|
||||
["mathfrak", "textfrak", "Fraktur-Regular"], // A-Z Fraktur
|
||||
["mathfrak", "textfrak", "Fraktur-Regular"], // a-z Fraktur
|
||||
|
||||
["mathbb", "textbb", "AMS-Regular"], // A-Z double-struck
|
||||
["mathbb", "textbb", "AMS-Regular"], // k double-struck
|
||||
|
||||
["", "", ""], // A-Z bold Fraktur No font metrics
|
||||
["", "", ""], // a-z bold Fraktur. No font.
|
||||
|
||||
["mathsf", "textsf", "SansSerif-Regular"], // A-Z sans-serif
|
||||
["mathsf", "textsf", "SansSerif-Regular"], // a-z sans-serif
|
||||
|
||||
["mathboldsf", "textboldsf", "SansSerif-Bold"], // A-Z bold sans-serif
|
||||
["mathboldsf", "textboldsf", "SansSerif-Bold"], // a-z bold sans-serif
|
||||
|
||||
["mathitsf", "textitsf", "SansSerif-Italic"], // A-Z italic sans-serif
|
||||
["mathitsf", "textitsf", "SansSerif-Italic"], // a-z italic sans-serif
|
||||
|
||||
["", "", ""], // A-Z bold italic sans. No font
|
||||
["", "", ""], // a-z bold italic sans. No font
|
||||
|
||||
["mathtt", "texttt", "Typewriter-Regular"], // A-Z monospace
|
||||
["mathtt", "texttt", "Typewriter-Regular"], // a-z monospace
|
||||
];
|
||||
|
||||
const wideNumeralData: Array<[string, string, string]> = [
|
||||
["mathbf", "textbf", "Main-Bold"], // 0-9 bold
|
||||
["", "", ""], // 0-9 double-struck. No KaTeX font.
|
||||
["mathsf", "textsf", "SansSerif-Regular"], // 0-9 sans-serif
|
||||
["mathboldsf", "textboldsf", "SansSerif-Bold"], // 0-9 bold sans-serif
|
||||
["mathtt", "texttt", "Typewriter-Regular"], // 0-9 monospace
|
||||
];
|
||||
|
||||
export const wideCharacterFont = function(
|
||||
wideChar: string,
|
||||
mode: Mode,
|
||||
): [string, string] {
|
||||
|
||||
// IE doesn't support codePointAt(). So work with the surrogate pair.
|
||||
const H = wideChar.charCodeAt(0); // high surrogate
|
||||
const L = wideChar.charCodeAt(1); // low surrogate
|
||||
const codePoint = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000;
|
||||
|
||||
const j = mode === "math" ? 0 : 1; // column index for CSS class.
|
||||
|
||||
if (0x1D400 <= codePoint && codePoint < 0x1D6A4) {
|
||||
// wideLatinLetterData contains exactly 26 chars on each row.
|
||||
// So we can calculate the relevant row. No traverse necessary.
|
||||
const i = Math.floor((codePoint - 0x1D400) / 26);
|
||||
return [wideLatinLetterData[i][2], wideLatinLetterData[i][j]];
|
||||
|
||||
} else if (0x1D7CE <= codePoint && codePoint <= 0x1D7FF) {
|
||||
// Numerals, ten per row.
|
||||
const i = Math.floor((codePoint - 0x1D7CE) / 10);
|
||||
return [wideNumeralData[i][2], wideNumeralData[i][j]];
|
||||
|
||||
} else if (codePoint === 0x1D6A5 || codePoint === 0x1D6A6) {
|
||||
// dotless i or j
|
||||
return [wideLatinLetterData[0][2], wideLatinLetterData[0][j]];
|
||||
|
||||
} else if (0x1D6A6 < codePoint && codePoint < 0x1D7CE) {
|
||||
// Greek letters. Not supported, yet.
|
||||
return ["", ""];
|
||||
|
||||
} else {
|
||||
// We don't support any wide characters outside 1D400–1D7FF.
|
||||
throw new ParseError("Unsupported character: " + wideChar);
|
||||
}
|
||||
};
|
@@ -3006,7 +3006,7 @@ describe("Unicode", function() {
|
||||
expect("±×÷∓∔∧∨∩∪≀⊎⊓⊔⊕⊖⊗⊘⊙⊚⊛⊝⊞⊟⊠⊡⊺⊻⊼⋇⋉⋊⋋⋌⋎⋏⋒⋓⩞\u22C5").toParse();
|
||||
});
|
||||
|
||||
it("should parse delimeters", function() {
|
||||
it("should build delimiters", function() {
|
||||
expect("\\left\u230A\\frac{a}{b}\\right\u230B").toBuild();
|
||||
expect("\\left\u2308\\frac{a}{b}\\right\u2308").toBuild();
|
||||
expect("\\left\u27ee\\frac{a}{b}\\right\u27ef").toBuild();
|
||||
@@ -3014,6 +3014,41 @@ describe("Unicode", function() {
|
||||
expect("\\left\u23b0\\frac{a}{b}\\right\u23b1").toBuild();
|
||||
expect("┌x┐ └x┘").toBuild();
|
||||
});
|
||||
|
||||
it("should build some surrogate pairs", function() {
|
||||
let wideCharStr = "";
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDC00); // bold A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDC68); // bold italic A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDD04); // Fraktur A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDD38); // double-struck
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDC9C); // script A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDDA0); // sans serif A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDDD4); // bold sans A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDE08); // italic sans A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDE70); // monospace A
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDFCE); // bold zero
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero
|
||||
wideCharStr += String.fromCharCode(0xD835, 0xDFF6); // monospace zero
|
||||
expect(wideCharStr).toBuild();
|
||||
|
||||
let wideCharText = "\text{";
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDC00); // bold A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDC68); // bold italic A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDD04); // Fraktur A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDD38); // double-struck
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDC9C); // script A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDDA0); // sans serif A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDDD4); // bold sans A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDE08); // italic sans A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDE70); // monospace A
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDFCE); // bold zero
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero
|
||||
wideCharText += String.fromCharCode(0xD835, 0xDFF6); // monospace zero
|
||||
wideCharText += "}";
|
||||
expect(wideCharText).toBuild();
|
||||
});
|
||||
});
|
||||
|
||||
describe("The maxSize setting", function() {
|
||||
|
BIN
test/screenshotter/images/SurrogatePairs-chrome.png
Normal file
BIN
test/screenshotter/images/SurrogatePairs-chrome.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
BIN
test/screenshotter/images/SurrogatePairs-firefox.png
Normal file
BIN
test/screenshotter/images/SurrogatePairs-firefox.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
@@ -292,6 +292,12 @@ SupSubHorizSpacing: |
|
||||
SupSubLeftAlignReset: |
|
||||
\omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}}
|
||||
SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f
|
||||
SurrogatePairs: |
|
||||
\begin{array}{l}
|
||||
𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜 \\
|
||||
\text{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜} \\
|
||||
\mathrm{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜}
|
||||
\end{array}
|
||||
SvgReset:
|
||||
# This math uses a lot of SVG glyphs
|
||||
tex: '\underbrace{\sqrt{\vec{x}} + \sqrt{\vec{\color{#f00}y}}}'
|
||||
|
Reference in New Issue
Block a user