fix: Respect catcode in macro expansion and set ~'s catcode correctly (#2949)

Support single characters having a catcode of 13 (active) or 12 (other),
and expand a macro defined for the character only if it's active.
This enables defining `~` as a macro, but avoiding expansion in URLs.

Fix #2924

Co-authored-by: ylemkimon <y@ylem.kim>
This commit is contained in:
Erik Demaine
2021-04-24 15:33:01 -04:00
committed by GitHub
parent 24332e053c
commit 01ae7f8eef
6 changed files with 22 additions and 3 deletions

View File

@@ -60,7 +60,8 @@ export default class Lexer implements LexerInterface {
input: string;
settings: Settings;
tokenRegex: RegExp;
// category codes, only supports comment characters (14) for now
// Category codes. The lexer only supports comment characters (14) for now.
// MacroExpander additionally distinguishes active (13).
catcodes: {[string]: number};
constructor(input: string, settings: Settings) {
@@ -70,6 +71,7 @@ export default class Lexer implements LexerInterface {
this.tokenRegex = new RegExp(tokenRegexString, 'g');
this.catcodes = {
"%": 14, // comment character
"~": 13, // active character
};
}

View File

@@ -395,6 +395,14 @@ export default class MacroExpander implements MacroContextInterface {
if (definition == null) { // mainly checking for undefined here
return definition;
}
// If a single character has an associated catcode other than 13
// (active character), then don't expand it.
if (name.length === 1) {
const catcode = this.lexer.catcodes[name];
if (catcode != null && catcode !== 13) {
return;
}
}
const expansion =
typeof definition === "function" ? definition(this) : definition;
if (typeof expansion === "string") {

View File

@@ -682,8 +682,10 @@ export default class Parser {
*/
parseUrlGroup(optional: boolean): ?ParseNode<"url"> {
this.gullet.lexer.setCatcode("%", 13); // active character
this.gullet.lexer.setCatcode("~", 12); // other character
const res = this.parseStringGroup("url", optional);
this.gullet.lexer.setCatcode("%", 14); // comment character
this.gullet.lexer.setCatcode("~", 13); // active character
if (res == null) {
return null;
}

View File

@@ -327,10 +327,12 @@ defineMacro("\\bgroup", "{");
defineMacro("\\egroup", "}");
// Symbols from latex.ltx:
// \def~{\nobreakspace{}}
// \def\lq{`}
// \def\rq{'}
// \def \aa {\r a}
// \def \AA {\r A}
defineMacro("~", "\\nobreakspace");
defineMacro("\\lq", "`");
defineMacro("\\rq", "'");
defineMacro("\\aa", "\\r a");

View File

@@ -582,13 +582,11 @@ defineSymbol(math, main, rel, "\u2192", "\\to");
defineSymbol(math, ams, rel, "\u2271", "\\ngeq", true);
defineSymbol(math, ams, rel, "\u2270", "\\nleq", true);
defineSymbol(math, main, spacing, "\u00a0", "\\ ");
defineSymbol(math, main, spacing, "\u00a0", "~");
defineSymbol(math, main, spacing, "\u00a0", "\\space");
// Ref: LaTeX Source 2e: \DeclareRobustCommand{\nobreakspace}{%
defineSymbol(math, main, spacing, "\u00a0", "\\nobreakspace");
defineSymbol(text, main, spacing, "\u00a0", "\\ ");
defineSymbol(text, main, spacing, "\u00a0", " ");
defineSymbol(text, main, spacing, "\u00a0", "~");
defineSymbol(text, main, spacing, "\u00a0", "\\space");
defineSymbol(text, main, spacing, "\u00a0", "\\nobreakspace");
defineSymbol(math, main, spacing, null, "\\nobreak");

View File

@@ -3282,6 +3282,13 @@ describe("A macro expander", function() {
expect('\\char"g').not.toParse();
});
it("\\char escapes ~ correctly", () => {
const parsedBare = getParsed("~");
expect(parsedBare[0].type).toEqual("spacing");
const parsedChar = getParsed("\\char`\\~");
expect(parsedChar[0].type).toEqual("textord");
});
it("should build Unicode private area characters", function() {
expect`\gvertneqq\lvertneqq\ngeqq\ngeqslant\nleqq`.toBuild();
expect`\nleqslant\nshortmid\nshortparallel\varsubsetneq`.toBuild();