From 01ae7f8eef09bcddc6e327c2cb5a3460800652d5 Mon Sep 17 00:00:00 2001 From: Erik Demaine Date: Sat, 24 Apr 2021 15:33:01 -0400 Subject: [PATCH] fix: Respect catcode in macro expansion and set ~'s catcode correctly (#2949) Support single characters having a catcode of 13 (active) or 12 (other), and expand a macro defined for the character only if it's active. This enables defining `~` as a macro, but avoiding expansion in URLs. Fix #2924 Co-authored-by: ylemkimon --- src/Lexer.js | 4 +++- src/MacroExpander.js | 8 ++++++++ src/Parser.js | 2 ++ src/macros.js | 2 ++ src/symbols.js | 2 -- test/katex-spec.js | 7 +++++++ 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index c9271c1f..7cf96edd 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -60,7 +60,8 @@ export default class Lexer implements LexerInterface { input: string; settings: Settings; tokenRegex: RegExp; - // category codes, only supports comment characters (14) for now + // Category codes. The lexer only supports comment characters (14) for now. + // MacroExpander additionally distinguishes active (13). catcodes: {[string]: number}; constructor(input: string, settings: Settings) { @@ -70,6 +71,7 @@ export default class Lexer implements LexerInterface { this.tokenRegex = new RegExp(tokenRegexString, 'g'); this.catcodes = { "%": 14, // comment character + "~": 13, // active character }; } diff --git a/src/MacroExpander.js b/src/MacroExpander.js index bcd5ec4f..94a809a8 100644 --- a/src/MacroExpander.js +++ b/src/MacroExpander.js @@ -395,6 +395,14 @@ export default class MacroExpander implements MacroContextInterface { if (definition == null) { // mainly checking for undefined here return definition; } + // If a single character has an associated catcode other than 13 + // (active character), then don't expand it. + if (name.length === 1) { + const catcode = this.lexer.catcodes[name]; + if (catcode != null && catcode !== 13) { + return; + } + } const expansion = typeof definition === "function" ? definition(this) : definition; if (typeof expansion === "string") { diff --git a/src/Parser.js b/src/Parser.js index c5448740..49ea4c5f 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -682,8 +682,10 @@ export default class Parser { */ parseUrlGroup(optional: boolean): ?ParseNode<"url"> { this.gullet.lexer.setCatcode("%", 13); // active character + this.gullet.lexer.setCatcode("~", 12); // other character const res = this.parseStringGroup("url", optional); this.gullet.lexer.setCatcode("%", 14); // comment character + this.gullet.lexer.setCatcode("~", 13); // active character if (res == null) { return null; } diff --git a/src/macros.js b/src/macros.js index 986e31c5..0d8ec59c 100644 --- a/src/macros.js +++ b/src/macros.js @@ -327,10 +327,12 @@ defineMacro("\\bgroup", "{"); defineMacro("\\egroup", "}"); // Symbols from latex.ltx: +// \def~{\nobreakspace{}} // \def\lq{`} // \def\rq{'} // \def \aa {\r a} // \def \AA {\r A} +defineMacro("~", "\\nobreakspace"); defineMacro("\\lq", "`"); defineMacro("\\rq", "'"); defineMacro("\\aa", "\\r a"); diff --git a/src/symbols.js b/src/symbols.js index bd9f2544..ee71a65e 100644 --- a/src/symbols.js +++ b/src/symbols.js @@ -582,13 +582,11 @@ defineSymbol(math, main, rel, "\u2192", "\\to"); defineSymbol(math, ams, rel, "\u2271", "\\ngeq", true); defineSymbol(math, ams, rel, "\u2270", "\\nleq", true); defineSymbol(math, main, spacing, "\u00a0", "\\ "); -defineSymbol(math, main, spacing, "\u00a0", "~"); defineSymbol(math, main, spacing, "\u00a0", "\\space"); // Ref: LaTeX Source 2e: \DeclareRobustCommand{\nobreakspace}{% defineSymbol(math, main, spacing, "\u00a0", "\\nobreakspace"); defineSymbol(text, main, spacing, "\u00a0", "\\ "); defineSymbol(text, main, spacing, "\u00a0", " "); -defineSymbol(text, main, spacing, "\u00a0", "~"); defineSymbol(text, main, spacing, "\u00a0", "\\space"); defineSymbol(text, main, spacing, "\u00a0", "\\nobreakspace"); defineSymbol(math, main, spacing, null, "\\nobreak"); diff --git a/test/katex-spec.js b/test/katex-spec.js index d3df44f9..ba668201 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -3282,6 +3282,13 @@ describe("A macro expander", function() { expect('\\char"g').not.toParse(); }); + it("\\char escapes ~ correctly", () => { + const parsedBare = getParsed("~"); + expect(parsedBare[0].type).toEqual("spacing"); + const parsedChar = getParsed("\\char`\\~"); + expect(parsedChar[0].type).toEqual("textord"); + }); + it("should build Unicode private area characters", function() { expect`\gvertneqq\lvertneqq\ngeqq\ngeqslant\nleqq`.toBuild(); expect`\nleqslant\nshortmid\nshortparallel\varsubsetneq`.toBuild();