mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 03:08:40 +00:00
feat: Support Unicode (sub|super)script characters (#3633)
* feat: Support Unicode (sub|super)script characters * Acquire tokens via repeated fetch() * Match more Unicode (sub|super)script characters * Update docs with new characters * Add Greek characters to RegEx * Pick up review comments Co-authored-by: Erik Demaine <edemaine@mit.edu>
This commit is contained in:
@@ -190,6 +190,9 @@ $\allowbreak α β γ δ ϵ ζ η θ ι κ λ μ ν ξ o π \allowbreak ρ σ τ
|
||||
|
||||
Direct Input: $∂ ∇ ℑ Ⅎ ℵ ℶ ℷ ℸ ⅁ ℏ ð − ∗$
|
||||
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäåçèéêëìíîïðñòóôöùúûüýþÿ
|
||||
₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ⁺⁻⁼⁽⁾⁰¹²³⁴⁵⁶⁷⁸⁹ᵃᵇᶜᵈᵉᵍʰⁱʲᵏˡᵐⁿᵒᵖʳˢᵗᵘʷˣʸᶻᵛᵝᵞᵟᵠᵡ
|
||||
|
||||
Math-mode Unicode (sub|super)script characters will render as if you had written regular characters in a subscript or superscript. For instance, `A²⁺³` will render the same as `A^{2+3}`.
|
||||
|
||||
</div>
|
||||
<div class="katex-cards" id="math-alpha">
|
||||
|
@@ -9,6 +9,7 @@ import ParseError from "./ParseError";
|
||||
import {combiningDiacriticalMarksEndRegex} from "./Lexer";
|
||||
import Settings from "./Settings";
|
||||
import SourceLocation from "./SourceLocation";
|
||||
import {uSubsAndSups, unicodeSubRegEx} from "./unicodeSupOrSub";
|
||||
import {Token} from "./Token";
|
||||
|
||||
// Pre-evaluate both modules as unicodeSymbols require String.normalize()
|
||||
@@ -399,6 +400,29 @@ export default class Parser {
|
||||
}
|
||||
// Put everything into an ordgroup as the superscript
|
||||
superscript = {type: "ordgroup", mode: this.mode, body: primes};
|
||||
} else if (uSubsAndSups[lex.text]) {
|
||||
// A Unicode subscript or superscript character.
|
||||
// We treat these similarly to the unicode-math package.
|
||||
// So we render a string of Unicode (sub|super)scripts the
|
||||
// same as a (sub|super)script of regular characters.
|
||||
let str = uSubsAndSups[lex.text];
|
||||
const isSub = unicodeSubRegEx.test(lex.text);
|
||||
this.consume();
|
||||
// Continue fetching tokens to fill out the string.
|
||||
while (true) {
|
||||
const token = this.fetch().text;
|
||||
if (!(uSubsAndSups[token])) { break; }
|
||||
if (unicodeSubRegEx.test(token) !== isSub) { break; }
|
||||
this.consume();
|
||||
str += uSubsAndSups[token];
|
||||
}
|
||||
// Now create a (sub|super)script.
|
||||
const body = (new Parser(str, this.settings)).parse();
|
||||
if (isSub) {
|
||||
subscript = {type: "ordgroup", mode: "math", body};
|
||||
} else {
|
||||
superscript = {type: "ordgroup", mode: "math", body};
|
||||
}
|
||||
} else {
|
||||
// If it wasn't ^, _, or ', stop parsing super/subscripts
|
||||
break;
|
||||
|
108
src/unicodeSupOrSub.js
Normal file
108
src/unicodeSupOrSub.js
Normal file
@@ -0,0 +1,108 @@
|
||||
// Helpers for Parser.js handling of Unicode (sub|super)script characters.
|
||||
|
||||
export const unicodeSubRegEx = /^[₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ]/;
|
||||
|
||||
export const uSubsAndSups = Object.freeze({
|
||||
'₊': '+',
|
||||
'₋': '-',
|
||||
'₌': '=',
|
||||
'₍': '(',
|
||||
'₎': ')',
|
||||
'₀': '0',
|
||||
'₁': '1',
|
||||
'₂': '2',
|
||||
'₃': '3',
|
||||
'₄': '4',
|
||||
'₅': '5',
|
||||
'₆': '6',
|
||||
'₇': '7',
|
||||
'₈': '8',
|
||||
'₉': '9',
|
||||
'\u2090': 'a',
|
||||
'\u2091': 'e',
|
||||
'\u2095': 'h',
|
||||
'\u1D62': 'i',
|
||||
'\u2C7C': 'j',
|
||||
'\u2096': 'k',
|
||||
'\u2097': 'l',
|
||||
'\u2098': 'm',
|
||||
'\u2099': 'n',
|
||||
'\u2092': 'o',
|
||||
'\u209A': 'p',
|
||||
'\u1D63': 'r',
|
||||
'\u209B': 's',
|
||||
'\u209C': 't',
|
||||
'\u1D64': 'u',
|
||||
'\u1D65': 'v',
|
||||
'\u2093': 'x',
|
||||
'\u1D66': 'β',
|
||||
'\u1D67': 'γ',
|
||||
'\u1D68': 'ρ',
|
||||
'\u1D69': '\u03d5',
|
||||
'\u1D6A': 'χ',
|
||||
'⁺': '+',
|
||||
'⁻': '-',
|
||||
'⁼': '=',
|
||||
'⁽': '(',
|
||||
'⁾': ')',
|
||||
'⁰': '0',
|
||||
'¹': '1',
|
||||
'²': '2',
|
||||
'³': '3',
|
||||
'⁴': '4',
|
||||
'⁵': '5',
|
||||
'⁶': '6',
|
||||
'⁷': '7',
|
||||
'⁸': '8',
|
||||
'⁹': '9',
|
||||
'\u1D2C': 'A',
|
||||
'\u1D2E': 'B',
|
||||
'\u1D30': 'D',
|
||||
'\u1D31': 'E',
|
||||
'\u1D33': 'G',
|
||||
'\u1D34': 'H',
|
||||
'\u1D35': 'I',
|
||||
'\u1D36': 'J',
|
||||
'\u1D37': 'K',
|
||||
'\u1D38': 'L',
|
||||
'\u1D39': 'M',
|
||||
'\u1D3A': 'N',
|
||||
'\u1D3C': 'O',
|
||||
'\u1D3E': 'P',
|
||||
'\u1D3F': 'R',
|
||||
'\u1D40': 'T',
|
||||
'\u1D41': 'U',
|
||||
'\u2C7D': 'V',
|
||||
'\u1D42': 'W',
|
||||
'\u1D43': 'a',
|
||||
'\u1D47': 'b',
|
||||
'\u1D9C': 'c',
|
||||
'\u1D48': 'd',
|
||||
'\u1D49': 'e',
|
||||
'\u1DA0': 'f',
|
||||
'\u1D4D': 'g',
|
||||
'\u02B0': 'h',
|
||||
'\u2071': 'i',
|
||||
'\u02B2': 'j',
|
||||
'\u1D4F': 'k',
|
||||
'\u02E1': 'l',
|
||||
'\u1D50': 'm',
|
||||
'\u207F': 'n',
|
||||
'\u1D52': 'o',
|
||||
'\u1D56': 'p',
|
||||
'\u02B3': 'r',
|
||||
'\u02E2': 's',
|
||||
'\u1D57': 't',
|
||||
'\u1D58': 'u',
|
||||
'\u1D5B': 'v',
|
||||
'\u02B7': 'w',
|
||||
'\u02E3': 'x',
|
||||
'\u02B8': 'y',
|
||||
'\u1DBB': 'z',
|
||||
'\u1D5D': 'β',
|
||||
'\u1D5E': 'γ',
|
||||
'\u1D5F': 'δ',
|
||||
'\u1D60': '\u03d5',
|
||||
'\u1D61': 'χ',
|
||||
'\u1DBF': 'θ',
|
||||
});
|
@@ -275,6 +275,10 @@ describe("A subscript and superscript parser", function() {
|
||||
expect`x_{x^x}`.toParse();
|
||||
expect`x_{x_x}`.toParse();
|
||||
});
|
||||
|
||||
it("should work with Unicode (sub|super)script characters", function() {
|
||||
expect`A² + B²⁺³ + ¹²C + E₂³ + F₂₊₃`.toParseLike("A^{2} + B^{2+3} + ^{12}C + E_{2}^{3} + F_{2+3}");
|
||||
});
|
||||
});
|
||||
|
||||
describe("A subscript and superscript tree-builder", function() {
|
||||
|
Reference in New Issue
Block a user