fix: rewrite of splitAtDelimiters.js -- new fix for #2523 (#2679)

* rewrite of splitAtDelimiters.js -- new fix for #2523

* fix splitAtDelimiters test

* minor changes to splitAtDelimiters

* tweak the indentation a bit

Co-authored-by: Kevin Barabash <kevinb@khanacademy.org>
Co-authored-by: Kevin Barabash <kevinb7@gmail.com>
This commit is contained in:
Paul Zinn-Justin
2020-12-26 19:14:31 +00:00
committed by GitHub
parent 3c26b44810
commit b34175bd92
3 changed files with 126 additions and 132 deletions

View File

@@ -3,22 +3,11 @@
import katex from "katex";
import splitAtDelimiters from "./splitAtDelimiters";
const splitWithDelimiters = function(text, delimiters) {
let data = [{type: "text", data: text}];
for (let i = 0; i < delimiters.length; i++) {
const delimiter = delimiters[i];
data = splitAtDelimiters(
data, delimiter.left, delimiter.right,
delimiter.display || false);
}
return data;
};
/* Note: optionsCopy is mutated by this method. If it is ever exposed in the
* API, we should copy it before mutating.
*/
const renderMathInText = function(text, optionsCopy) {
const data = splitWithDelimiters(text, optionsCopy.delimiters);
const data = splitAtDelimiters(text, optionsCopy.delimiters);
if (data.length === 1 && data[0].type === 'text') {
// There is no formula in the text.
// Let's return null which means there is no need to replace
@@ -48,7 +37,7 @@ const renderMathInText = function(text, optionsCopy) {
}
optionsCopy.errorCallback(
"KaTeX auto-render: Failed to parse `" + data[i].data +
"` with ",
"` with ",
e
);
fragment.appendChild(document.createTextNode(data[i].rawData));
@@ -76,8 +65,8 @@ const renderElem = function(elem, optionsCopy) {
const className = ' ' + childNode.className + ' ';
const shouldRender = optionsCopy.ignoredTags.indexOf(
childNode.nodeName.toLowerCase()) === -1 &&
optionsCopy.ignoredClasses.every(
x => className.indexOf(' ' + x + ' ') === -1);
optionsCopy.ignoredClasses.every(
x => className.indexOf(' ' + x + ' ') === -1);
if (shouldRender) {
renderElem(childNode, optionsCopy);
@@ -107,11 +96,7 @@ const renderMathInElement = function(elem, options) {
{left: "\\(", right: "\\)", display: false},
// LaTeX uses $…$, but it ruins the display of normal `$` in text:
// {left: "$", right: "$", display: false},
// \[…\] must come last in this array. Otherwise, renderMathInElement
// will search for \[ before it searches for $$ or \(
// That makes it susceptible to finding a \\[0.3em] row delimiter and
// treating it as if it were the start of a KaTeX math zone.
// $ must come after $$
{left: "\\[", right: "\\]", display: true},
];
optionsCopy.ignoredTags = optionsCopy.ignoredTags || [

View File

@@ -27,76 +27,53 @@ const findEndOfMath = function(delimiter, text, startIndex) {
return -1;
};
const splitAtDelimiters = function(startData, leftDelim, rightDelim, display) {
const finalData = [];
const escapeRegex = function(string) {
return string.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&");
};
for (let i = 0; i < startData.length; i++) {
if (startData[i].type === "text") {
const text = startData[i].data;
const splitAtDelimiters = function(text, delimiters) {
let index;
const data = [];
let lookingForLeft = true;
let currIndex = 0;
let nextIndex;
const regexLeft = new RegExp(
"(" + delimiters.map((x) => escapeRegex(x.left)).join("|") + ")"
);
nextIndex = text.indexOf(leftDelim);
if (nextIndex !== -1) {
currIndex = nextIndex;
finalData.push({
type: "text",
data: text.slice(0, currIndex),
});
lookingForLeft = false;
}
while (true) {
if (lookingForLeft) {
nextIndex = text.indexOf(leftDelim, currIndex);
if (nextIndex === -1) {
break;
}
finalData.push({
type: "text",
data: text.slice(currIndex, nextIndex),
});
currIndex = nextIndex;
} else {
nextIndex = findEndOfMath(
rightDelim,
text,
currIndex + leftDelim.length);
if (nextIndex === -1) {
break;
}
finalData.push({
type: "math",
data: text.slice(
currIndex + leftDelim.length,
nextIndex),
rawData: text.slice(
currIndex,
nextIndex + rightDelim.length),
display: display,
});
currIndex = nextIndex + rightDelim.length;
}
lookingForLeft = !lookingForLeft;
}
finalData.push({
type: "text",
data: text.slice(currIndex),
});
} else {
finalData.push(startData[i]);
while (true) {
index = text.search(regexLeft);
if (index === -1) {
break;
}
if (index > 0) {
data.push({
type: "text",
data: text.slice(0, index),
});
text = text.slice(index); // now text starts with delimiter
}
// ... so this always succeeds:
const i = delimiters.findIndex((delim) => text.startsWith(delim.left));
index = findEndOfMath(delimiters[i].right, text, delimiters[i].left.length);
if (index === -1) {
break;
}
data.push({
type: "math",
data: text.slice(delimiters[i].left.length, index),
rawData: text.slice(0, index + delimiters[i].right.length),
display: delimiters[i].display,
});
text = text.slice(index + delimiters[i].right.length);
}
return finalData;
if (text !== "") {
data.push({
type: "text",
data: text,
});
}
return data;
};
export default splitAtDelimiters;

View File

@@ -6,17 +6,16 @@ beforeEach(function() {
toSplitInto: function(actual, left, right, result) {
const message = {
pass: true,
message: "'" + actual + "' split correctly",
message: () => "'" + actual + "' split correctly",
};
const startData = [{type: "text", data: actual}];
const split =
splitAtDelimiters(startData, left, right, false);
splitAtDelimiters(actual,
[{left: left, right: right, display: false}]);
if (split.length !== result.length) {
message.pass = false;
message.message = "Different number of splits: " +
message.message = () => "Different number of splits: " +
split.length + " vs. " + result.length + " (" +
JSON.stringify(split) + " vs. " +
JSON.stringify(result) + ")";
@@ -43,7 +42,7 @@ beforeEach(function() {
if (!good) {
message.pass = false;
message.message = "Difference at split " +
message.message = () => "Difference at split " +
(i + 1) + ": " + JSON.stringify(real) +
" vs. " + JSON.stringify(correct) +
" (" + diff + " differs)";
@@ -146,6 +145,19 @@ describe("A delimiter splitter", function() {
]);
});
it("correctly processes sequences of $..$", function() {
expect("$hello$$world$$boo$").toSplitInto(
"$", "$",
[
{type: "math", data: "hello",
rawData: "$hello$", display: false},
{type: "math", data: "world",
rawData: "$world$", display: false},
{type: "math", data: "boo",
rawData: "$boo$", display: false},
]);
});
it("doesn't split at escaped delimiters", function() {
expect("hello ( world \\) ) boo").toSplitInto(
"(", ")",
@@ -157,14 +169,14 @@ describe("A delimiter splitter", function() {
]);
/* TODO(emily): make this work maybe?
expect("hello \\( ( world ) boo").toSplitInto(
"(", ")",
[
{type: "text", data: "hello \\( "},
{type: "math", data: " world ",
rawData: "( world )", display: false},
{type: "text", data: " boo"},
]);
expect("hello \\( ( world ) boo").toSplitInto(
"(", ")",
[
{type: "text", data: "hello \\( "},
{type: "math", data: " world ",
rawData: "( world )", display: false},
{type: "text", data: " boo"},
]);
*/
});
@@ -179,10 +191,20 @@ describe("A delimiter splitter", function() {
]);
});
it("remembers which delimiters are display-mode", function() {
const startData = [{type: "text", data: "hello ( world ) boo"}];
it("ignores \\$", function() {
expect("$x = \\$5$").toSplitInto(
"$", "$",
[
{type: "math", data: "x = \\$5",
rawData: "$x = \\$5$", display: false},
]);
});
expect(splitAtDelimiters(startData, "(", ")", true)).toEqual(
it("remembers which delimiters are display-mode", function() {
const startData = "hello ( world ) boo";
expect(splitAtDelimiters(startData,
[{left:"(", right:")", display:true}])).toEqual(
[
{type: "text", data: "hello "},
{type: "math", data: " world ",
@@ -191,42 +213,52 @@ describe("A delimiter splitter", function() {
]);
});
it("works with more than one start datum", function() {
const startData = [
{type: "text", data: "hello ( world ) boo"},
{type: "math", data: "math", rawData: "(math)", display: true},
{type: "text", data: "hello ( world ) boo"},
];
expect(splitAtDelimiters(startData, "(", ")", false)).toEqual(
it("handles nested delimiters irrespective of order", function() {
expect(splitAtDelimiters("$\\fbox{\\(hi\\)}$",
[
{type: "text", data: "hello "},
{type: "math", data: " world ",
rawData: "( world )", display: false},
{type: "text", data: " boo"},
{type: "math", data: "math", rawData: "(math)", display: true},
{type: "text", data: "hello "},
{type: "math", data: " world ",
rawData: "( world )", display: false},
{type: "text", data: " boo"},
{left:"\\(", right:"\\)", display:false},
{left:"$", right:"$", display:false},
])).toEqual(
[
{type: "math", data: "\\fbox{\\(hi\\)}",
rawData: "$\\fbox{\\(hi\\)}$", display: false},
]);
expect(splitAtDelimiters("\\(\\fbox{$hi$}\\)",
[
{left:"\\(", right:"\\)", display:false},
{left:"$", right:"$", display:false},
])).toEqual(
[
{type: "math", data: "\\fbox{$hi$}",
rawData: "\\(\\fbox{$hi$}\\)", display: false},
]);
});
it("doesn't do splitting inside of math nodes", function() {
const startData = [
{type: "text", data: "hello ( world ) boo"},
{type: "math", data: "hello ( world ) boo",
rawData: "(hello ( world ) boo)", display: true},
];
expect(splitAtDelimiters(startData, "(", ")", false)).toEqual(
it("handles a mix of $ and $$", function() {
expect(splitAtDelimiters("$hello$world$$boo$$",
[
{type: "text", data: "hello "},
{type: "math", data: " world ",
rawData: "( world )", display: false},
{type: "text", data: " boo"},
{type: "math", data: "hello ( world ) boo",
rawData: "(hello ( world ) boo)", display: true},
{left:"$$", right:"$$", display:true},
{left:"$", right:"$", display:false},
])).toEqual(
[
{type: "math", data: "hello",
rawData: "$hello$", display: false},
{type: "text", data: "world"},
{type: "math", data: "boo",
rawData: "$$boo$$", display: true},
]);
expect(splitAtDelimiters("$hello$$world$$$boo$$",
[
{left:"$$", right:"$$", display:true},
{left:"$", right:"$", display:false},
])).toEqual(
[
{type: "math", data: "hello",
rawData: "$hello$", display: false},
{type: "math", data: "world",
rawData: "$world$", display: false},
{type: "math", data: "boo",
rawData: "$$boo$$", display: true},
]);
});
});