diff --git a/src/buildCommon.js b/src/buildCommon.js index c3d94b6d..05bd97de 100644 --- a/src/buildCommon.js +++ b/src/buildCommon.js @@ -10,6 +10,7 @@ import fontMetrics from "./fontMetrics"; import symbols from "./symbols"; import utils from "./utils"; import stretchy from "./stretchy"; +import {wideCharacterFont} from "./wide-character"; import {calculateSize} from "./units"; import type Options from "./Options"; @@ -234,7 +235,11 @@ const makeOrd = function( // Math mode or Old font (i.e. \rm) const isFont = mode === "math" || (mode === "text" && options.font); const fontOrFamily = isFont ? options.font : options.fontFamily; - if (fontOrFamily) { + if (value.charCodeAt(0) === 0xD835) { + // surrogate pairs get special treatment + const [wideFontName, wideFontClass] = wideCharacterFont(value, mode); + return makeSymbol(value, wideFontName, mode, options, [wideFontClass]); + } else if (fontOrFamily) { let fontName; let fontClasses; if (fontOrFamily === "boldsymbol") { diff --git a/src/buildMathML.js b/src/buildMathML.js index 420a8008..a828e4f7 100644 --- a/src/buildMathML.js +++ b/src/buildMathML.js @@ -21,7 +21,9 @@ import stretchy from "./stretchy"; */ export const makeText = function(text, mode) { if (symbols[mode][text] && symbols[mode][text].replace) { - text = symbols[mode][text].replace; + if (text.charCodeAt(0) !== 0xD835) { + text = symbols[mode][text].replace; + } } return new mathMLTree.TextNode(text); diff --git a/src/katex.less b/src/katex.less index b9b990e4..07a066a9 100644 --- a/src/katex.less +++ b/src/katex.less @@ -108,7 +108,7 @@ font-family: KaTeX_AMS; } - .mathbb { + .mathbb, .textbb { font-family: KaTeX_AMS; } @@ -116,7 +116,7 @@ font-family: KaTeX_Caligraphic; } - .mathfrak { + .mathfrak, .textfrak { font-family: KaTeX_Fraktur; } @@ -124,11 +124,11 @@ font-family: KaTeX_Typewriter; } - .mathscr { + .mathscr, .textscr { font-family: KaTeX_Script; } - .mathsf { + .mathsf, .textsf { font-family: KaTeX_SansSerif; } diff --git a/src/symbols.js b/src/symbols.js index 6bf0682b..58160414 100644 --- a/src/symbols.js +++ b/src/symbols.js @@ -729,7 +729,6 @@ defineSymbol(text, main, spacing, "\u00a0", " "); defineSymbol(text, main, spacing, "\u00a0", "~"); // There are lots of symbols which are the same, so we add them in afterwards. - // All of these are textords in math mode const mathTextSymbols = "0123456789/@.\""; for (let i = 0; i < mathTextSymbols.length; i++) { @@ -745,13 +744,97 @@ for (let i = 0; i < textSymbols.length; i++) { } // All of these are textords in text mode, and mathords in math mode -const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; for (let i = 0; i < letters.length; i++) { const ch = letters.charAt(i); defineSymbol(math, main, mathord, ch, ch); defineSymbol(text, main, textord, ch, ch); } +// The next loop loads wide (surrogate pair) characters. +// We support some letters in the Unicode range U+1D400 to U+1D7FF, +// Mathematical Alphanumeric Symbols. +// Some editors do not deal well with wide characters. So don't write the +// string into this file. Instead, create the string from the surrogate pair. +let wideChar = ""; +for (let i = 0; i < letters.length; i++) { + const ch = letters.charAt(i); + + // The hex numbers in the next line are a surrogate pair. + // 0xD835 is the high surrogate for all letters in the range we support. + // 0xDC00 is the low surrogate for bold A. + wideChar = String.fromCharCode(0xD835, 0xDC00 + i); // A-Z a-z bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC34 + i); // A-Z a-z italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC68 + i); // A-Z a-z bold italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDD04 + i); // A-Z a-z Fractur + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDDA0 + i); // A-Z a-z sans-serif + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDDD4 + i); // A-Z a-z sans bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDE08 + i); // A-Z a-z sans italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDE70 + i); // A-Z a-z monospace + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + if (i < 26) { + // KaTeX fonts have only capital letters for blackboard bold and script. + // See exception for k below. + wideChar = String.fromCharCode(0xD835, 0xDD38 + i); // A-Z double struck + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC9C + i); // A-Z script + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + } + + // TODO: Add bold script when it is supported by a KaTeX font. +} +// "k" is the only double struck lower case letter in the KaTeX fonts. +wideChar = String.fromCharCode(0xD835, 0xDD5C); // k double struck +defineSymbol(math, main, mathord, "k", wideChar); +defineSymbol(text, main, textord, "k", wideChar); + +// Next, some wide character numerals +for (let i = 0; i < 10; i++) { + const ch = letters.charAt(i); + + wideChar = String.fromCharCode(0xD835, 0xDFCE + i); // 0-9 bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFE2 + i); // 0-9 sans serif + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFEC + i); // 0-9 bold sans + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFF6 + i); // 0-9 monospace + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); +} + // We add these Latin-1 letters as symbols for backwards-compatibility, // but they are not actually in the font, nor are they supported by the // Unicode accent mechanism, so they fall back to Times font and look ugly. diff --git a/src/wide-character.js b/src/wide-character.js new file mode 100644 index 00000000..c10cc1c9 --- /dev/null +++ b/src/wide-character.js @@ -0,0 +1,110 @@ +// @flow + +/** + * This file provides support for Unicode range U+1D400 to U+1D7FF, + * Mathematical Alphanumeric Symbols. + * + * Function wideCharacterFont takes a wide character as input and returns + * the font information necessary to render it properly. + */ + +import type {Mode} from "./types"; +import ParseError from "./ParseError"; + +/** + * Data below is from https://www.unicode.org/charts/PDF/U1D400.pdf + * That document sorts characters into groups by font type, say bold or italic. + * + * In the arrays below, each subarray consists three elements: + * * The CSS class of that group when in math mode. + * * The CSS class of that group when in text mode. + * * The font name, so that KaTeX can get font metrics. + */ + +const wideLatinLetterData: Array<[string, string, string]> = [ + ["mathbf", "textbf", "Main-Bold"], // A-Z bold upright + ["mathbf", "textbf", "Main-Bold"], // a-z bold upright + + ["mathit", "textit", "Math-Italic"], // A-Z italic + ["mathit", "textit", "Math-Italic"], // a-z italic + + ["boldsymbol", "boldsymbol", "Main-BoldItalic"], // A-Z bold italic + ["boldsymbol", "boldsymbol", "Main-BoldItalic"], // a-z bold italic + + // Map fancy A-Z letters to script, not calligraphic. + // This aligns with unicode-math and math fonts (except Cambria Math). + ["mathscr", "textscr", "Script-Regular"], // A-Z script + ["", "", ""], // a-z script. No font + + ["", "", ""], // A-Z bold script. No font + ["", "", ""], // a-z bold script. No font + + ["mathfrak", "textfrak", "Fraktur-Regular"], // A-Z Fraktur + ["mathfrak", "textfrak", "Fraktur-Regular"], // a-z Fraktur + + ["mathbb", "textbb", "AMS-Regular"], // A-Z double-struck + ["mathbb", "textbb", "AMS-Regular"], // k double-struck + + ["", "", ""], // A-Z bold Fraktur No font metrics + ["", "", ""], // a-z bold Fraktur. No font. + + ["mathsf", "textsf", "SansSerif-Regular"], // A-Z sans-serif + ["mathsf", "textsf", "SansSerif-Regular"], // a-z sans-serif + + ["mathboldsf", "textboldsf", "SansSerif-Bold"], // A-Z bold sans-serif + ["mathboldsf", "textboldsf", "SansSerif-Bold"], // a-z bold sans-serif + + ["mathitsf", "textitsf", "SansSerif-Italic"], // A-Z italic sans-serif + ["mathitsf", "textitsf", "SansSerif-Italic"], // a-z italic sans-serif + + ["", "", ""], // A-Z bold italic sans. No font + ["", "", ""], // a-z bold italic sans. No font + + ["mathtt", "texttt", "Typewriter-Regular"], // A-Z monospace + ["mathtt", "texttt", "Typewriter-Regular"], // a-z monospace +]; + +const wideNumeralData: Array<[string, string, string]> = [ + ["mathbf", "textbf", "Main-Bold"], // 0-9 bold + ["", "", ""], // 0-9 double-struck. No KaTeX font. + ["mathsf", "textsf", "SansSerif-Regular"], // 0-9 sans-serif + ["mathboldsf", "textboldsf", "SansSerif-Bold"], // 0-9 bold sans-serif + ["mathtt", "texttt", "Typewriter-Regular"], // 0-9 monospace +]; + +export const wideCharacterFont = function( + wideChar: string, + mode: Mode, +): [string, string] { + + // IE doesn't support codePointAt(). So work with the surrogate pair. + const H = wideChar.charCodeAt(0); // high surrogate + const L = wideChar.charCodeAt(1); // low surrogate + const codePoint = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000; + + const j = mode === "math" ? 0 : 1; // column index for CSS class. + + if (0x1D400 <= codePoint && codePoint < 0x1D6A4) { + // wideLatinLetterData contains exactly 26 chars on each row. + // So we can calculate the relevant row. No traverse necessary. + const i = Math.floor((codePoint - 0x1D400) / 26); + return [wideLatinLetterData[i][2], wideLatinLetterData[i][j]]; + + } else if (0x1D7CE <= codePoint && codePoint <= 0x1D7FF) { + // Numerals, ten per row. + const i = Math.floor((codePoint - 0x1D7CE) / 10); + return [wideNumeralData[i][2], wideNumeralData[i][j]]; + + } else if (codePoint === 0x1D6A5 || codePoint === 0x1D6A6) { + // dotless i or j + return [wideLatinLetterData[0][2], wideLatinLetterData[0][j]]; + + } else if (0x1D6A6 < codePoint && codePoint < 0x1D7CE) { + // Greek letters. Not supported, yet. + return ["", ""]; + + } else { + // We don't support any wide characters outside 1D400–1D7FF. + throw new ParseError("Unsupported character: " + wideChar); + } +}; diff --git a/test/katex-spec.js b/test/katex-spec.js index 2d2cb613..cae86e94 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -3006,7 +3006,7 @@ describe("Unicode", function() { expect("±×÷∓∔∧∨∩∪≀⊎⊓⊔⊕⊖⊗⊘⊙⊚⊛⊝⊞⊟⊠⊡⊺⊻⊼⋇⋉⋊⋋⋌⋎⋏⋒⋓⩞\u22C5").toParse(); }); - it("should parse delimeters", function() { + it("should build delimiters", function() { expect("\\left\u230A\\frac{a}{b}\\right\u230B").toBuild(); expect("\\left\u2308\\frac{a}{b}\\right\u2308").toBuild(); expect("\\left\u27ee\\frac{a}{b}\\right\u27ef").toBuild(); @@ -3014,6 +3014,41 @@ describe("Unicode", function() { expect("\\left\u23b0\\frac{a}{b}\\right\u23b1").toBuild(); expect("┌x┐ └x┘").toBuild(); }); + + it("should build some surrogate pairs", function() { + let wideCharStr = ""; + wideCharStr += String.fromCharCode(0xD835, 0xDC00); // bold A + wideCharStr += String.fromCharCode(0xD835, 0xDC68); // bold italic A + wideCharStr += String.fromCharCode(0xD835, 0xDD04); // Fraktur A + wideCharStr += String.fromCharCode(0xD835, 0xDD38); // double-struck + wideCharStr += String.fromCharCode(0xD835, 0xDC9C); // script A + wideCharStr += String.fromCharCode(0xD835, 0xDDA0); // sans serif A + wideCharStr += String.fromCharCode(0xD835, 0xDDD4); // bold sans A + wideCharStr += String.fromCharCode(0xD835, 0xDE08); // italic sans A + wideCharStr += String.fromCharCode(0xD835, 0xDE70); // monospace A + wideCharStr += String.fromCharCode(0xD835, 0xDFCE); // bold zero + wideCharStr += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero + wideCharStr += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero + wideCharStr += String.fromCharCode(0xD835, 0xDFF6); // monospace zero + expect(wideCharStr).toBuild(); + + let wideCharText = "\text{"; + wideCharText += String.fromCharCode(0xD835, 0xDC00); // bold A + wideCharText += String.fromCharCode(0xD835, 0xDC68); // bold italic A + wideCharText += String.fromCharCode(0xD835, 0xDD04); // Fraktur A + wideCharText += String.fromCharCode(0xD835, 0xDD38); // double-struck + wideCharText += String.fromCharCode(0xD835, 0xDC9C); // script A + wideCharText += String.fromCharCode(0xD835, 0xDDA0); // sans serif A + wideCharText += String.fromCharCode(0xD835, 0xDDD4); // bold sans A + wideCharText += String.fromCharCode(0xD835, 0xDE08); // italic sans A + wideCharText += String.fromCharCode(0xD835, 0xDE70); // monospace A + wideCharText += String.fromCharCode(0xD835, 0xDFCE); // bold zero + wideCharText += String.fromCharCode(0xD835, 0xDFE2); // sans serif zero + wideCharText += String.fromCharCode(0xD835, 0xDFEC); // bold sans zero + wideCharText += String.fromCharCode(0xD835, 0xDFF6); // monospace zero + wideCharText += "}"; + expect(wideCharText).toBuild(); + }); }); describe("The maxSize setting", function() { diff --git a/test/screenshotter/images/SurrogatePairs-chrome.png b/test/screenshotter/images/SurrogatePairs-chrome.png new file mode 100644 index 00000000..770b13b4 Binary files /dev/null and b/test/screenshotter/images/SurrogatePairs-chrome.png differ diff --git a/test/screenshotter/images/SurrogatePairs-firefox.png b/test/screenshotter/images/SurrogatePairs-firefox.png new file mode 100644 index 00000000..1d18ebb2 Binary files /dev/null and b/test/screenshotter/images/SurrogatePairs-firefox.png differ diff --git a/test/screenshotter/ss_data.yaml b/test/screenshotter/ss_data.yaml index 93f1cfff..1016651b 100644 --- a/test/screenshotter/ss_data.yaml +++ b/test/screenshotter/ss_data.yaml @@ -292,6 +292,12 @@ SupSubHorizSpacing: | SupSubLeftAlignReset: | \omega^8_{888} \quad \frac{1}{\hat{\omega}^{8}_{888}} \quad \displaystyle\sum_{\omega^{8}_{888}} SupSubOffsets: \displaystyle \int_{2+3}x f^{2+3}+3\lim_{2+3+4+5}f +SurrogatePairs: | + \begin{array}{l} + 𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜 \\ + \text{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜} \\ + \mathrm{𝐀𝐚𝑨𝒂𝔅𝔞𝔸𝒜} + \end{array} SvgReset: # This math uses a lot of SVG glyphs tex: '\underbrace{\sqrt{\vec{x}} + \sqrt{\vec{\color{#f00}y}}}'