From ff1734f7c4882fb350cb0e1f366f04ce63675643 Mon Sep 17 00:00:00 2001 From: Erik Demaine Date: Sat, 28 Aug 2021 18:55:05 -0400 Subject: [PATCH] fix: \char support for >16-bit Unicode characters (#3006) * fix: \char support for >16-bit Unicode characters `String.fromCharCode` only supports Unicode characters up to 16-bit. `String.fromCodePoint` from ECMAScript 2015 supports all Unicode code points. Unfortunately, IE doesn't support the latter, so use former as fallback. Fixes #3004 Co-authored-by: ylemkimon --- src/functions/char.js | 15 +++++++++++++-- test/katex-spec.js | 6 ++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/functions/char.js b/src/functions/char.js index ed1e413b..1600ffbf 100644 --- a/src/functions/char.js +++ b/src/functions/char.js @@ -21,14 +21,25 @@ defineFunction({ const node = assertNodeType(group[i], "textord"); number += node.text; } - const code = parseInt(number); + let code = parseInt(number); + let text; if (isNaN(code)) { throw new ParseError(`\\@char has non-numeric argument ${number}`); + // If we drop IE support, the following code could be replaced with + // text = String.fromCodePoint(code) + } else if (code < 0 || code >= 0x10ffff) { + throw new ParseError(`\\@char with invalid code point ${number}`); + } else if (code <= 0xffff) { + text = String.fromCharCode(code); + } else { // Astral code point; split into surrogate halves + code -= 0x10000; + text = String.fromCharCode((code >> 10) + 0xd800, + (code & 0x3ff) + 0xdc00); } return { type: "textord", mode: parser.mode, - text: String.fromCharCode(code), + text: text, }; }, }); diff --git a/test/katex-spec.js b/test/katex-spec.js index c926229b..ed521d77 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -3313,6 +3313,12 @@ describe("A macro expander", function() { expect(parsedChar[0].type).toEqual("textord"); }); + it("\\char handles >16-bit characters", () => { + const parsed = getParsed('\\char"1d7d9'); + expect(parsed[0].type).toEqual("textord"); + expect(parsed[0].text).toEqual("𝟙"); + }); + it("should build Unicode private area characters", function() { expect`\gvertneqq\lvertneqq\ngeqq\ngeqslant\nleqq`.toBuild(); expect`\nleqslant\nshortmid\nshortparallel\varsubsetneq`.toBuild();