From dd0c14ac014c2dda63d1f3973b9b3c75048de02b Mon Sep 17 00:00:00 2001 From: Erik Demaine Date: Mon, 21 Aug 2017 22:05:13 -0400 Subject: [PATCH] Support CJK full-width punctuation + Unicode dots (#814) --- src/symbols.js | 10 +++++----- src/unicodeRegexes.js | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/symbols.js b/src/symbols.js index 0cda21ce..05302371 100644 --- a/src/symbols.js +++ b/src/symbols.js @@ -599,11 +599,11 @@ defineSymbol(math, main, op, "\u2a06", "\\bigsqcup"); defineSymbol(math, main, op, "\u222b", "\\smallint"); defineSymbol(text, main, inner, "\u2026", "\\textellipsis"); defineSymbol(math, main, inner, "\u2026", "\\mathellipsis"); -defineSymbol(text, main, inner, "\u2026", "\\ldots"); -defineSymbol(math, main, inner, "\u2026", "\\ldots"); -defineSymbol(math, main, inner, "\u22ef", "\\cdots"); -defineSymbol(math, main, inner, "\u22f1", "\\ddots"); -defineSymbol(math, main, textord, "\u22ee", "\\vdots"); +defineSymbol(text, main, inner, "\u2026", "\\ldots", true); +defineSymbol(math, main, inner, "\u2026", "\\ldots", true); +defineSymbol(math, main, inner, "\u22ef", "\\cdots", true); +defineSymbol(math, main, inner, "\u22f1", "\\ddots", true); +defineSymbol(math, main, textord, "\u22ee", "\\vdots", true); defineSymbol(math, main, accent, "\u00b4", "\\acute"); defineSymbol(math, main, accent, "\u0060", "\\grave"); defineSymbol(math, main, accent, "\u00a8", "\\ddot"); diff --git a/src/unicodeRegexes.js b/src/unicodeRegexes.js index 39e7ac0c..b5b1dd50 100644 --- a/src/unicodeRegexes.js +++ b/src/unicodeRegexes.js @@ -1,13 +1,15 @@ const hangulRegex = /[\uAC00-\uD7AF]/; // This regex combines +// - CJK symbols and punctuation: [\u3000-\u303F] // - Hiragana: [\u3040-\u309F] // - Katakana: [\u30A0-\u30FF] // - CJK ideograms: [\u4E00-\u9FAF] // - Hangul syllables: [\uAC00-\uD7AF] +// - Fullwidth punctuation: [\uFF00-\uFF60] // Notably missing are halfwidth Katakana and Romanji glyphs. const cjkRegex = - /[\u3040-\u309F]|[\u30A0-\u30FF]|[\u4E00-\u9FAF]|[\uAC00-\uD7AF]/; + /[\u3000-\u30FF\u4E00-\u9FAF\uAC00-\uD7AF\uFF00-\uFF60]/; module.exports = { cjkRegex: cjkRegex,