From 647c661db0578b6bfb53fc1351cc815e16754da5 Mon Sep 17 00:00:00 2001 From: Erik Demaine Date: Tue, 1 Jan 2019 11:55:44 -0500 Subject: [PATCH] Remove double encoding in MathML via Unicode MathML spaces (#1813) * Remove double encoding in MathML via Unicode MathML spaces We used to have a complex mechanic for escaping strings, marking them as `needsEscape = false`, and then not escaping those strings (for combining strings in `\operatorname`). But this doesn't really work with `render`ing directly to a node, as `document.createTextNode` can't be stopped from escaping. I've thus removed this mechanic, which required the following changes: * Switch MathML "smart space" encoding to use Unicode instead of `&LongNames;` (which weren't working with `render` for the same reason). * Hack our HTML/MathML serializer to not use `String.trim`, which wrecks havoc with emitted Unicode spaces. Now `toText()` doesn't escape, so strings concatenate in unescaped form, and `toHTML()` only does the necessary escaping. Thus fix #1782. * Fix src/utils.js Co-Authored-By: edemaine * Fix src/mathMLTree.js documentation Co-Authored-By: edemaine * Remove trim hack thanks to diffable-html@4.0.0 * Switch back to jest-serializer-html * Update mathMLTree.js --- package.json | 2 +- src/functions/operatorname.js | 3 +- src/mathMLTree.js | 34 +++++++++---------- test/__snapshots__/mathml-spec.js.snap | 47 ++++++++++++++------------ test/setup.js | 2 +- yarn.lock | 18 +++++----- 6 files changed, 53 insertions(+), 53 deletions(-) diff --git a/package.json b/package.json index 2aeb9c09..7b87b86d 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,7 @@ "istanbul-api": "^2.0.5", "istanbul-lib-coverage": "^2.0.1", "jest": "^23.5.0", - "jest-serializer-html": "^5.0.0", + "jest-serializer-html": "^6.0.0", "js-yaml": "^3.10.0", "json-stable-stringify": "^1.0.1", "jspngopt": "^0.2.0", diff --git a/src/functions/operatorname.js b/src/functions/operatorname.js index 57b8b948..238c96b8 100644 --- a/src/functions/operatorname.js +++ b/src/functions/operatorname.js @@ -101,8 +101,7 @@ defineFunction({ if (isAllString) { // Write a single TextNode instead of multiple nested tags. const word = expression.map(node => node.toText()).join(""); - // word has already been escaped by `node.toText()` - expression = [new mathMLTree.TextNode(word, false)]; + expression = [new mathMLTree.TextNode(word)]; } const identifier = new mathMLTree.MathNode("mi", expression); diff --git a/src/mathMLTree.js b/src/mathMLTree.js index 69ecc849..610fea61 100644 --- a/src/mathMLTree.js +++ b/src/mathMLTree.js @@ -126,18 +126,16 @@ export class MathNode implements MathDomNode { */ export class TextNode implements MathDomNode { text: string; - needsEscape: boolean; - constructor(text: string, needsEscape: boolean = true) { + constructor(text: string) { this.text = text; - this.needsEscape = needsEscape; } /** * Converts the text node into a DOM text node. */ toNode(): Node { - return document.createTextNode(this.toText()); + return document.createTextNode(this.text); } /** @@ -145,15 +143,15 @@ export class TextNode implements MathDomNode { * (representing the text itself). */ toMarkup(): string { - return this.toText(); + return utils.escape(this.toText()); } /** - * Converts the text node into an escaped string + * Converts the text node into a string * (representing the text iteself). */ toText(): string { - return this.needsEscape ? utils.escape(this.text) : this.text; + return this.text; } } @@ -171,25 +169,25 @@ class SpaceNode implements MathDomNode { constructor(width: number) { this.width = width; // See https://www.w3.org/TR/2000/WD-MathML2-20000328/chapter6.html - // for a table of space-like characters. We consistently use the - // &LongNames; because Unicode does not have single characters for - //    (\u2005\u200a) and all negative spaces. + // for a table of space-like characters. We use Unicode + // representations instead of &LongNames; as it's not clear how to + // make the latter via document.createTextNode. if (width >= 0.05555 && width <= 0.05556) { - this.character = " "; // \u200a + this.character = "\u200a"; //   } else if (width >= 0.1666 && width <= 0.1667) { - this.character = " "; // \u2009 + this.character = "\u2009"; //   } else if (width >= 0.2222 && width <= 0.2223) { - this.character = " "; // \u2005 + this.character = "\u2005"; //   } else if (width >= 0.2777 && width <= 0.2778) { - this.character = "  "; // \u2005\u200a + this.character = "\u2005\u200a"; //    } else if (width >= -0.05556 && width <= -0.05555) { - this.character = "​"; + this.character = "\u200a\u2063"; // ​ } else if (width >= -0.1667 && width <= -0.1666) { - this.character = "​"; + this.character = "\u2009\u2063"; // ​ } else if (width >= -0.2223 && width <= -0.2222) { - this.character = "​"; + this.character = "\u205f\u2063"; // ​ } else if (width >= -0.2778 && width <= -0.2777) { - this.character = "​"; + this.character = "\u2005\u2063"; // ​ } else { this.character = null; } diff --git a/test/__snapshots__/mathml-spec.js.snap b/test/__snapshots__/mathml-spec.js.snap index 3ed5593b..42ac5c99 100644 --- a/test/__snapshots__/mathml-spec.js.snap +++ b/test/__snapshots__/mathml-spec.js.snap @@ -46,6 +46,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = ` bold +   italic @@ -68,6 +69,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = ` bold +   italic @@ -91,6 +93,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = ` bold +   italic @@ -300,7 +303,7 @@ exports[`A MathML builder should generate the right types of nodes 1`] = ` 1 -    +    a @@ -344,7 +347,7 @@ exports[`A MathML builder should output \\limsup_{x \\rightarrow \\infty} correc - lim sup + lim sup ⁡ @@ -379,7 +382,7 @@ exports[`A MathML builder should output \\limsup_{x \\rightarrow \\infty} in dis - lim sup + lim sup ⁡ @@ -702,61 +705,61 @@ exports[`A MathML builder special spaces render specially 1`] = ` -   +   -   +   -   +   -   +   -   +   -    +    -    +    - ​ +  ⁣ - ​ +  ⁣ - ​ +  ⁣ - ​ +  ⁣ -   +   -   +   -   +   -    +    - ​ +  ⁣ - ​ +  ⁣ - ​ +  ⁣ - ​ +  ⁣ diff --git a/test/setup.js b/test/setup.js index 83884b2e..90badbbe 100644 --- a/test/setup.js +++ b/test/setup.js @@ -8,7 +8,7 @@ import { expectKaTeX, expectEquivalent, } from "./helpers"; -// Serializer support +// JSON serializer const typeFirstCompare = (a, b) => { if (a.key === 'type') { diff --git a/yarn.lock b/yarn.lock index 9fc84869..444e7baa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2803,10 +2803,10 @@ diff@^3.2.0: resolved "https://registry.yarnpkg.com/diff/-/diff-3.5.0.tgz#800c0dd1e0a8bfbc95835c202ad220fe317e5a12" integrity sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA== -diffable-html@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/diffable-html/-/diffable-html-3.0.0.tgz#3766be8bcf6e90e061bdc321e04e14fc0630853f" - integrity sha512-lUxHiU00DexR/wKcY56OiJZmB0D66ghidYfU4VxUMG09TDx+1jjO7/dFrZKI2p9z00tWY/7ZeO9BBEi6n0jUYQ== +diffable-html@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/diffable-html/-/diffable-html-4.0.0.tgz#eb44a189785115bacd04829f31658fcd3c0ce590" + integrity sha512-keJdgy2qBkdrrnwP1YE6e834d4Y+mV0aRFzk8w7WzyAJVbQVfcJltSmUWB3r/NOoO/0jt7RdJlvy5ioyqvmQcw== dependencies: htmlparser2 "^3.9.2" @@ -5197,12 +5197,12 @@ jest-runtime@^23.5.0: write-file-atomic "^2.1.0" yargs "^11.0.0" -jest-serializer-html@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/jest-serializer-html/-/jest-serializer-html-5.0.0.tgz#6848c4d8fb0f6c6dcb1e26a05e46d66c7ff9edb9" - integrity sha512-WfBvUnFdcZNJZMHuOWQFry+5qScsLhoaPxUOzXTqxGIp7BzWMEgr2sG/GKdLTIIfyNviAVp3k4jcqHtBkG18Lg== +jest-serializer-html@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/jest-serializer-html/-/jest-serializer-html-6.0.0.tgz#39ce49a42b3937d1687f93923814d21b47ae9a85" + integrity sha512-S855oT9Yt1T07I45+uRaLsH22TR5lAvgqBxKreqDKs6QmcaxzapGgKamc4J2KxMrPc2uDdWcuOaprcjIuVUxvQ== dependencies: - diffable-html "^3.0.0" + diffable-html "^4.0.0" jest-serializer@^23.0.1: version "23.0.1"