mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 19:28:39 +00:00
feat: Support Unicode (sub|super)script characters (#3633)
* feat: Support Unicode (sub|super)script characters * Acquire tokens via repeated fetch() * Match more Unicode (sub|super)script characters * Update docs with new characters * Add Greek characters to RegEx * Pick up review comments Co-authored-by: Erik Demaine <edemaine@mit.edu>
This commit is contained in:
@@ -190,6 +190,9 @@ $\allowbreak α β γ δ ϵ ζ η θ ι κ λ μ ν ξ o π \allowbreak ρ σ τ
|
|||||||
|
|
||||||
Direct Input: $∂ ∇ ℑ Ⅎ ℵ ℶ ℷ ℸ ⅁ ℏ ð − ∗$
|
Direct Input: $∂ ∇ ℑ Ⅎ ℵ ℶ ℷ ℸ ⅁ ℏ ð − ∗$
|
||||||
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäåçèéêëìíîïðñòóôöùúûüýþÿ
|
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäåçèéêëìíîïðñòóôöùúûüýþÿ
|
||||||
|
₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ⁺⁻⁼⁽⁾⁰¹²³⁴⁵⁶⁷⁸⁹ᵃᵇᶜᵈᵉᵍʰⁱʲᵏˡᵐⁿᵒᵖʳˢᵗᵘʷˣʸᶻᵛᵝᵞᵟᵠᵡ
|
||||||
|
|
||||||
|
Math-mode Unicode (sub|super)script characters will render as if you had written regular characters in a subscript or superscript. For instance, `A²⁺³` will render the same as `A^{2+3}`.
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class="katex-cards" id="math-alpha">
|
<div class="katex-cards" id="math-alpha">
|
||||||
|
@@ -9,6 +9,7 @@ import ParseError from "./ParseError";
|
|||||||
import {combiningDiacriticalMarksEndRegex} from "./Lexer";
|
import {combiningDiacriticalMarksEndRegex} from "./Lexer";
|
||||||
import Settings from "./Settings";
|
import Settings from "./Settings";
|
||||||
import SourceLocation from "./SourceLocation";
|
import SourceLocation from "./SourceLocation";
|
||||||
|
import {uSubsAndSups, unicodeSubRegEx} from "./unicodeSupOrSub";
|
||||||
import {Token} from "./Token";
|
import {Token} from "./Token";
|
||||||
|
|
||||||
// Pre-evaluate both modules as unicodeSymbols require String.normalize()
|
// Pre-evaluate both modules as unicodeSymbols require String.normalize()
|
||||||
@@ -399,6 +400,29 @@ export default class Parser {
|
|||||||
}
|
}
|
||||||
// Put everything into an ordgroup as the superscript
|
// Put everything into an ordgroup as the superscript
|
||||||
superscript = {type: "ordgroup", mode: this.mode, body: primes};
|
superscript = {type: "ordgroup", mode: this.mode, body: primes};
|
||||||
|
} else if (uSubsAndSups[lex.text]) {
|
||||||
|
// A Unicode subscript or superscript character.
|
||||||
|
// We treat these similarly to the unicode-math package.
|
||||||
|
// So we render a string of Unicode (sub|super)scripts the
|
||||||
|
// same as a (sub|super)script of regular characters.
|
||||||
|
let str = uSubsAndSups[lex.text];
|
||||||
|
const isSub = unicodeSubRegEx.test(lex.text);
|
||||||
|
this.consume();
|
||||||
|
// Continue fetching tokens to fill out the string.
|
||||||
|
while (true) {
|
||||||
|
const token = this.fetch().text;
|
||||||
|
if (!(uSubsAndSups[token])) { break; }
|
||||||
|
if (unicodeSubRegEx.test(token) !== isSub) { break; }
|
||||||
|
this.consume();
|
||||||
|
str += uSubsAndSups[token];
|
||||||
|
}
|
||||||
|
// Now create a (sub|super)script.
|
||||||
|
const body = (new Parser(str, this.settings)).parse();
|
||||||
|
if (isSub) {
|
||||||
|
subscript = {type: "ordgroup", mode: "math", body};
|
||||||
|
} else {
|
||||||
|
superscript = {type: "ordgroup", mode: "math", body};
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// If it wasn't ^, _, or ', stop parsing super/subscripts
|
// If it wasn't ^, _, or ', stop parsing super/subscripts
|
||||||
break;
|
break;
|
||||||
|
108
src/unicodeSupOrSub.js
Normal file
108
src/unicodeSupOrSub.js
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
// Helpers for Parser.js handling of Unicode (sub|super)script characters.
|
||||||
|
|
||||||
|
export const unicodeSubRegEx = /^[₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ]/;
|
||||||
|
|
||||||
|
export const uSubsAndSups = Object.freeze({
|
||||||
|
'₊': '+',
|
||||||
|
'₋': '-',
|
||||||
|
'₌': '=',
|
||||||
|
'₍': '(',
|
||||||
|
'₎': ')',
|
||||||
|
'₀': '0',
|
||||||
|
'₁': '1',
|
||||||
|
'₂': '2',
|
||||||
|
'₃': '3',
|
||||||
|
'₄': '4',
|
||||||
|
'₅': '5',
|
||||||
|
'₆': '6',
|
||||||
|
'₇': '7',
|
||||||
|
'₈': '8',
|
||||||
|
'₉': '9',
|
||||||
|
'\u2090': 'a',
|
||||||
|
'\u2091': 'e',
|
||||||
|
'\u2095': 'h',
|
||||||
|
'\u1D62': 'i',
|
||||||
|
'\u2C7C': 'j',
|
||||||
|
'\u2096': 'k',
|
||||||
|
'\u2097': 'l',
|
||||||
|
'\u2098': 'm',
|
||||||
|
'\u2099': 'n',
|
||||||
|
'\u2092': 'o',
|
||||||
|
'\u209A': 'p',
|
||||||
|
'\u1D63': 'r',
|
||||||
|
'\u209B': 's',
|
||||||
|
'\u209C': 't',
|
||||||
|
'\u1D64': 'u',
|
||||||
|
'\u1D65': 'v',
|
||||||
|
'\u2093': 'x',
|
||||||
|
'\u1D66': 'β',
|
||||||
|
'\u1D67': 'γ',
|
||||||
|
'\u1D68': 'ρ',
|
||||||
|
'\u1D69': '\u03d5',
|
||||||
|
'\u1D6A': 'χ',
|
||||||
|
'⁺': '+',
|
||||||
|
'⁻': '-',
|
||||||
|
'⁼': '=',
|
||||||
|
'⁽': '(',
|
||||||
|
'⁾': ')',
|
||||||
|
'⁰': '0',
|
||||||
|
'¹': '1',
|
||||||
|
'²': '2',
|
||||||
|
'³': '3',
|
||||||
|
'⁴': '4',
|
||||||
|
'⁵': '5',
|
||||||
|
'⁶': '6',
|
||||||
|
'⁷': '7',
|
||||||
|
'⁸': '8',
|
||||||
|
'⁹': '9',
|
||||||
|
'\u1D2C': 'A',
|
||||||
|
'\u1D2E': 'B',
|
||||||
|
'\u1D30': 'D',
|
||||||
|
'\u1D31': 'E',
|
||||||
|
'\u1D33': 'G',
|
||||||
|
'\u1D34': 'H',
|
||||||
|
'\u1D35': 'I',
|
||||||
|
'\u1D36': 'J',
|
||||||
|
'\u1D37': 'K',
|
||||||
|
'\u1D38': 'L',
|
||||||
|
'\u1D39': 'M',
|
||||||
|
'\u1D3A': 'N',
|
||||||
|
'\u1D3C': 'O',
|
||||||
|
'\u1D3E': 'P',
|
||||||
|
'\u1D3F': 'R',
|
||||||
|
'\u1D40': 'T',
|
||||||
|
'\u1D41': 'U',
|
||||||
|
'\u2C7D': 'V',
|
||||||
|
'\u1D42': 'W',
|
||||||
|
'\u1D43': 'a',
|
||||||
|
'\u1D47': 'b',
|
||||||
|
'\u1D9C': 'c',
|
||||||
|
'\u1D48': 'd',
|
||||||
|
'\u1D49': 'e',
|
||||||
|
'\u1DA0': 'f',
|
||||||
|
'\u1D4D': 'g',
|
||||||
|
'\u02B0': 'h',
|
||||||
|
'\u2071': 'i',
|
||||||
|
'\u02B2': 'j',
|
||||||
|
'\u1D4F': 'k',
|
||||||
|
'\u02E1': 'l',
|
||||||
|
'\u1D50': 'm',
|
||||||
|
'\u207F': 'n',
|
||||||
|
'\u1D52': 'o',
|
||||||
|
'\u1D56': 'p',
|
||||||
|
'\u02B3': 'r',
|
||||||
|
'\u02E2': 's',
|
||||||
|
'\u1D57': 't',
|
||||||
|
'\u1D58': 'u',
|
||||||
|
'\u1D5B': 'v',
|
||||||
|
'\u02B7': 'w',
|
||||||
|
'\u02E3': 'x',
|
||||||
|
'\u02B8': 'y',
|
||||||
|
'\u1DBB': 'z',
|
||||||
|
'\u1D5D': 'β',
|
||||||
|
'\u1D5E': 'γ',
|
||||||
|
'\u1D5F': 'δ',
|
||||||
|
'\u1D60': '\u03d5',
|
||||||
|
'\u1D61': 'χ',
|
||||||
|
'\u1DBF': 'θ',
|
||||||
|
});
|
@@ -275,6 +275,10 @@ describe("A subscript and superscript parser", function() {
|
|||||||
expect`x_{x^x}`.toParse();
|
expect`x_{x^x}`.toParse();
|
||||||
expect`x_{x_x}`.toParse();
|
expect`x_{x_x}`.toParse();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should work with Unicode (sub|super)script characters", function() {
|
||||||
|
expect`A² + B²⁺³ + ¹²C + E₂³ + F₂₊₃`.toParseLike("A^{2} + B^{2+3} + ^{12}C + E_{2}^{3} + F_{2+3}");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("A subscript and superscript tree-builder", function() {
|
describe("A subscript and superscript tree-builder", function() {
|
||||||
|
Reference in New Issue
Block a user