Remove double encoding in MathML via Unicode MathML spaces (#1813)

* Remove double encoding in MathML via Unicode MathML spaces

We used to have a complex mechanic for escaping strings, marking them as
`needsEscape = false`, and then not escaping those strings (for
combining strings in `\operatorname`).  But this doesn't really work
with `render`ing directly to a node, as `document.createTextNode` can't
be stopped from escaping.

I've thus removed this mechanic, which required the following changes:

* Switch MathML "smart space" encoding to use Unicode instead of
  `&LongNames;` (which weren't working with `render` for the same reason).
* Hack our HTML/MathML serializer to not use `String.trim`, which wrecks
  havoc with emitted Unicode spaces.

Now `toText()` doesn't escape, so strings concatenate in unescaped form,
and `toHTML()` only does the necessary escaping.  Thus fix #1782.

* Fix src/utils.js

Co-Authored-By: edemaine <edemaine@mit.edu>

* Fix src/mathMLTree.js documentation

Co-Authored-By: edemaine <edemaine@mit.edu>

* Remove trim hack thanks to diffable-html@4.0.0

* Switch back to jest-serializer-html

* Update mathMLTree.js
This commit is contained in:
Erik Demaine
2019-01-01 11:55:44 -05:00
committed by ylemkimon
parent f5234a2f01
commit 647c661db0
6 changed files with 53 additions and 53 deletions

View File

@@ -47,7 +47,7 @@
"istanbul-api": "^2.0.5",
"istanbul-lib-coverage": "^2.0.1",
"jest": "^23.5.0",
"jest-serializer-html": "^5.0.0",
"jest-serializer-html": "^6.0.0",
"js-yaml": "^3.10.0",
"json-stable-stringify": "^1.0.1",
"jspngopt": "^0.2.0",

View File

@@ -101,8 +101,7 @@ defineFunction({
if (isAllString) {
// Write a single TextNode instead of multiple nested tags.
const word = expression.map(node => node.toText()).join("");
// word has already been escaped by `node.toText()`
expression = [new mathMLTree.TextNode(word, false)];
expression = [new mathMLTree.TextNode(word)];
}
const identifier = new mathMLTree.MathNode("mi", expression);

View File

@@ -126,18 +126,16 @@ export class MathNode implements MathDomNode {
*/
export class TextNode implements MathDomNode {
text: string;
needsEscape: boolean;
constructor(text: string, needsEscape: boolean = true) {
constructor(text: string) {
this.text = text;
this.needsEscape = needsEscape;
}
/**
* Converts the text node into a DOM text node.
*/
toNode(): Node {
return document.createTextNode(this.toText());
return document.createTextNode(this.text);
}
/**
@@ -145,15 +143,15 @@ export class TextNode implements MathDomNode {
* (representing the text itself).
*/
toMarkup(): string {
return this.toText();
return utils.escape(this.toText());
}
/**
* Converts the text node into an escaped string
* Converts the text node into a string
* (representing the text iteself).
*/
toText(): string {
return this.needsEscape ? utils.escape(this.text) : this.text;
return this.text;
}
}
@@ -171,25 +169,25 @@ class SpaceNode implements MathDomNode {
constructor(width: number) {
this.width = width;
// See https://www.w3.org/TR/2000/WD-MathML2-20000328/chapter6.html
// for a table of space-like characters. We consistently use the
// &LongNames; because Unicode does not have single characters for
// &ThickSpace; (\u2005\u200a) and all negative spaces.
// for a table of space-like characters. We use Unicode
// representations instead of &LongNames; as it's not clear how to
// make the latter via document.createTextNode.
if (width >= 0.05555 && width <= 0.05556) {
this.character = "&VeryThinSpace;"; // \u200a
this.character = "\u200a"; // &VeryThinSpace;
} else if (width >= 0.1666 && width <= 0.1667) {
this.character = "&ThinSpace;"; // \u2009
this.character = "\u2009"; // &ThinSpace;
} else if (width >= 0.2222 && width <= 0.2223) {
this.character = "&MediumSpace;"; // \u2005
this.character = "\u2005"; // &MediumSpace;
} else if (width >= 0.2777 && width <= 0.2778) {
this.character = "&ThickSpace;"; // \u2005\u200a
this.character = "\u2005\u200a"; // &ThickSpace;
} else if (width >= -0.05556 && width <= -0.05555) {
this.character = "&NegativeVeryThinSpace;";
this.character = "\u200a\u2063"; // &NegativeVeryThinSpace;
} else if (width >= -0.1667 && width <= -0.1666) {
this.character = "&NegativeThinSpace;";
this.character = "\u2009\u2063"; // &NegativeThinSpace;
} else if (width >= -0.2223 && width <= -0.2222) {
this.character = "&NegativeMediumSpace;";
this.character = "\u205f\u2063"; // &NegativeMediumSpace;
} else if (width >= -0.2778 && width <= -0.2777) {
this.character = "&NegativeThickSpace;";
this.character = "\u2005\u2063"; // &NegativeThickSpace;
} else {
this.character = null;
}

View File

@@ -46,6 +46,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = `
bold
</mtext>
<mtext>
 
</mtext>
<mtext mathvariant="bold-italic">
italic
@@ -68,6 +69,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = `
bold
</mtext>
<mtext>
 
</mtext>
<mtext mathvariant="sans-serif-bold-italic">
italic
@@ -91,6 +93,7 @@ exports[`A MathML builder \\text fonts become mathvariant 1`] = `
bold
</mtext>
<mtext>
 
</mtext>
<mtext mathvariant="monospace">
italic
@@ -300,7 +303,7 @@ exports[`A MathML builder should generate the right types of nodes 1`] = `
1
</mn>
<mtext>
&ThickSpace;
</mtext>
<mtext>
a
@@ -344,7 +347,7 @@ exports[`A MathML builder should output \\limsup_{x \\rightarrow \\infty} correc
<msub>
<mo>
<mi mathvariant="normal">
lim&ThinSpace;sup
limsup
</mi>
<mo>
@@ -379,7 +382,7 @@ exports[`A MathML builder should output \\limsup_{x \\rightarrow \\infty} in dis
<munder>
<mo>
<mi mathvariant="normal">
lim&ThinSpace;sup
limsup
</mi>
<mo>
@@ -702,61 +705,61 @@ exports[`A MathML builder special spaces render specially 1`] = `
<semantics>
<mrow>
<mtext>
&ThinSpace;
</mtext>
<mtext>
&ThinSpace;
</mtext>
<mtext>
&MediumSpace;
</mtext>
<mtext>
&MediumSpace;
</mtext>
<mtext>
&MediumSpace;
</mtext>
<mtext>
&ThickSpace;
</mtext>
<mtext>
&ThickSpace;
</mtext>
<mtext>
&NegativeThinSpace;
</mtext>
<mtext>
&NegativeThinSpace;
</mtext>
<mtext>
&NegativeMediumSpace;
</mtext>
<mtext>
&NegativeThickSpace;
</mtext>
<mtext>
&VeryThinSpace;
</mtext>
<mtext>
&ThinSpace;
</mtext>
<mtext>
&MediumSpace;
</mtext>
<mtext>
&ThickSpace;
</mtext>
<mtext>
&NegativeVeryThinSpace;
</mtext>
<mtext>
&NegativeThinSpace;
</mtext>
<mtext>
&NegativeMediumSpace;
</mtext>
<mtext>
&NegativeThickSpace;
</mtext>
</mrow>
<annotation encoding="application/x-tex">

View File

@@ -8,7 +8,7 @@ import {
expectKaTeX, expectEquivalent,
} from "./helpers";
// Serializer support
// JSON serializer
const typeFirstCompare = (a, b) => {
if (a.key === 'type') {

View File

@@ -2803,10 +2803,10 @@ diff@^3.2.0:
resolved "https://registry.yarnpkg.com/diff/-/diff-3.5.0.tgz#800c0dd1e0a8bfbc95835c202ad220fe317e5a12"
integrity sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==
diffable-html@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/diffable-html/-/diffable-html-3.0.0.tgz#3766be8bcf6e90e061bdc321e04e14fc0630853f"
integrity sha512-lUxHiU00DexR/wKcY56OiJZmB0D66ghidYfU4VxUMG09TDx+1jjO7/dFrZKI2p9z00tWY/7ZeO9BBEi6n0jUYQ==
diffable-html@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/diffable-html/-/diffable-html-4.0.0.tgz#eb44a189785115bacd04829f31658fcd3c0ce590"
integrity sha512-keJdgy2qBkdrrnwP1YE6e834d4Y+mV0aRFzk8w7WzyAJVbQVfcJltSmUWB3r/NOoO/0jt7RdJlvy5ioyqvmQcw==
dependencies:
htmlparser2 "^3.9.2"
@@ -5197,12 +5197,12 @@ jest-runtime@^23.5.0:
write-file-atomic "^2.1.0"
yargs "^11.0.0"
jest-serializer-html@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/jest-serializer-html/-/jest-serializer-html-5.0.0.tgz#6848c4d8fb0f6c6dcb1e26a05e46d66c7ff9edb9"
integrity sha512-WfBvUnFdcZNJZMHuOWQFry+5qScsLhoaPxUOzXTqxGIp7BzWMEgr2sG/GKdLTIIfyNviAVp3k4jcqHtBkG18Lg==
jest-serializer-html@^6.0.0:
version "6.0.0"
resolved "https://registry.yarnpkg.com/jest-serializer-html/-/jest-serializer-html-6.0.0.tgz#39ce49a42b3937d1687f93923814d21b47ae9a85"
integrity sha512-S855oT9Yt1T07I45+uRaLsH22TR5lAvgqBxKreqDKs6QmcaxzapGgKamc4J2KxMrPc2uDdWcuOaprcjIuVUxvQ==
dependencies:
diffable-html "^3.0.0"
diffable-html "^4.0.0"
jest-serializer@^23.0.1:
version "23.0.1"