fix: rewrite of splitAtDelimiters.js -- new fix for #2523 (#2679)

* rewrite of splitAtDelimiters.js -- new fix for #2523 * fix splitAtDelimiters test * minor changes to splitAtDelimiters * tweak the indentation a bit Co-authored-by: Kevin Barabash <kevinb@khanacademy.org> Co-authored-by: Kevin Barabash <kevinb7@gmail.com>
2025-10-08 04:28:41 +00:00 · 2020-12-26 19:14:31 +00:00
parent 3c26b44810
commit b34175bd92
3 changed files with 126 additions and 132 deletions
--- a/contrib/auto-render/auto-render.js
+++ b/contrib/auto-render/auto-render.js
@@ -3,22 +3,11 @@
 import katex from "katex";
 import splitAtDelimiters from "./splitAtDelimiters";

-const splitWithDelimiters = function(text, delimiters) {
-    let data = [{type: "text", data: text}];
-    for (let i = 0; i < delimiters.length; i++) {
-        const delimiter = delimiters[i];
-        data = splitAtDelimiters(
-            data, delimiter.left, delimiter.right,
-            delimiter.display || false);
-    }
-    return data;
-};
-
 /* Note: optionsCopy is mutated by this method. If it is ever exposed in the
 * API, we should copy it before mutating.
 */
 const renderMathInText = function(text, optionsCopy) {
-    const data = splitWithDelimiters(text, optionsCopy.delimiters);
+    const data = splitAtDelimiters(text, optionsCopy.delimiters);
    if (data.length === 1 && data[0].type === 'text') {
        // There is no formula in the text.
        // Let's return null which means there is no need to replace
@@ -48,7 +37,7 @@ const renderMathInText = function(text, optionsCopy) {
                }
                optionsCopy.errorCallback(
                    "KaTeX auto-render: Failed to parse `" + data[i].data +
-                    "` with ",
+                        "` with ",
                    e
                );
                fragment.appendChild(document.createTextNode(data[i].rawData));
@@ -76,8 +65,8 @@ const renderElem = function(elem, optionsCopy) {
            const className = ' ' + childNode.className + ' ';
            const shouldRender = optionsCopy.ignoredTags.indexOf(
                childNode.nodeName.toLowerCase()) === -1 &&
-                    optionsCopy.ignoredClasses.every(
-                        x => className.indexOf(' ' + x + ' ') === -1);
+                  optionsCopy.ignoredClasses.every(
+                      x => className.indexOf(' ' + x + ' ') === -1);

            if (shouldRender) {
                renderElem(childNode, optionsCopy);
@@ -107,11 +96,7 @@ const renderMathInElement = function(elem, options) {
        {left: "\\(", right: "\\)", display: false},
        // LaTeX uses $…$, but it ruins the display of normal `$` in text:
        // {left: "$", right: "$", display: false},
-
-        //  \[…\] must come last in this array. Otherwise, renderMathInElement
-        //  will search for \[ before it searches for $$ or  \(
-        // That makes it susceptible to finding a \\[0.3em] row delimiter and
-        // treating it as if it were the start of a KaTeX math zone.
+	// $ must come after $$
        {left: "\\[", right: "\\]", display: true},
    ];
    optionsCopy.ignoredTags = optionsCopy.ignoredTags || [
--- a/contrib/auto-render/splitAtDelimiters.js
+++ b/contrib/auto-render/splitAtDelimiters.js
@@ -27,76 +27,53 @@ const findEndOfMath = function(delimiter, text, startIndex) {
    return -1;
 };

-const splitAtDelimiters = function(startData, leftDelim, rightDelim, display) {
-    const finalData = [];
+const escapeRegex = function(string) {
+    return string.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&");
+};

-    for (let i = 0; i < startData.length; i++) {
-        if (startData[i].type === "text") {
-            const text = startData[i].data;
+const splitAtDelimiters = function(text, delimiters) {
+    let index;
+    const data = [];

-            let lookingForLeft = true;
-            let currIndex = 0;
-            let nextIndex;
+    const regexLeft = new RegExp(
+        "(" + delimiters.map((x) => escapeRegex(x.left)).join("|") + ")"
+    );

-            nextIndex = text.indexOf(leftDelim);
-            if (nextIndex !== -1) {
-                currIndex = nextIndex;
-                finalData.push({
-                    type: "text",
-                    data: text.slice(0, currIndex),
-                });
-                lookingForLeft = false;
-            }
-
-            while (true) {
-                if (lookingForLeft) {
-                    nextIndex = text.indexOf(leftDelim, currIndex);
-                    if (nextIndex === -1) {
-                        break;
-                    }
-
-                    finalData.push({
-                        type: "text",
-                        data: text.slice(currIndex, nextIndex),
-                    });
-
-                    currIndex = nextIndex;
-                } else {
-                    nextIndex = findEndOfMath(
-                        rightDelim,
-                        text,
-                        currIndex + leftDelim.length);
-                    if (nextIndex === -1) {
-                        break;
-                    }
-
-                    finalData.push({
-                        type: "math",
-                        data: text.slice(
-                            currIndex + leftDelim.length,
-                            nextIndex),
-                        rawData: text.slice(
-                            currIndex,
-                            nextIndex + rightDelim.length),
-                        display: display,
-                    });
-
-                    currIndex = nextIndex + rightDelim.length;
-                }
-
-                lookingForLeft = !lookingForLeft;
-            }
-
-            finalData.push({
-                type: "text",
-                data: text.slice(currIndex),
-            });
-        } else {
-            finalData.push(startData[i]);
+    while (true) {
+        index = text.search(regexLeft);
+        if (index === -1) {
+            break;
        }
+        if (index > 0) {
+            data.push({
+                type: "text",
+                data: text.slice(0, index),
+            });
+            text = text.slice(index); // now text starts with delimiter
+        }
+        // ... so this always succeeds:
+        const i = delimiters.findIndex((delim) => text.startsWith(delim.left));
+        index = findEndOfMath(delimiters[i].right, text, delimiters[i].left.length);
+        if (index === -1) {
+            break;
+        }
+        data.push({
+            type: "math",
+            data: text.slice(delimiters[i].left.length, index),
+            rawData: text.slice(0, index + delimiters[i].right.length),
+            display: delimiters[i].display,
+        });
+        text = text.slice(index + delimiters[i].right.length);
    }

-    return finalData;
+    if (text !== "") {
+        data.push({
+            type: "text",
+            data: text,
+        });
+    }
+
+    return data;
 };

 export default splitAtDelimiters;
--- a/contrib/auto-render/test/auto-render-spec.js
+++ b/contrib/auto-render/test/auto-render-spec.js
@@ -6,17 +6,16 @@ beforeEach(function() {
        toSplitInto: function(actual, left, right, result) {
            const message = {
                pass: true,
-                message: "'" + actual + "' split correctly",
+                message: () => "'" + actual + "' split correctly",
            };

-            const startData = [{type: "text", data: actual}];
-
            const split =
-                splitAtDelimiters(startData, left, right, false);
+                  splitAtDelimiters(actual,
+                                    [{left: left, right: right, display: false}]);

            if (split.length !== result.length) {
                message.pass = false;
-                message.message = "Different number of splits: " +
+                message.message = () => "Different number of splits: " +
                    split.length + " vs. " + result.length + " (" +
                    JSON.stringify(split) + " vs. " +
                    JSON.stringify(result) + ")";
@@ -43,7 +42,7 @@ beforeEach(function() {

                if (!good) {
                    message.pass = false;
-                    message.message = "Difference at split " +
+                    message.message = () => "Difference at split " +
                        (i + 1) + ": " + JSON.stringify(real) +
                        " vs. " + JSON.stringify(correct) +
                        " (" + diff + " differs)";
@@ -146,6 +145,19 @@ describe("A delimiter splitter", function() {
            ]);
    });

+    it("correctly processes sequences of $..$", function() {
+        expect("$hello$$world$$boo$").toSplitInto(
+            "$", "$",
+            [
+                {type: "math", data: "hello",
+                    rawData: "$hello$", display: false},
+                {type: "math", data: "world",
+                    rawData: "$world$", display: false},
+                {type: "math", data: "boo",
+                    rawData: "$boo$", display: false},
+            ]);
+    });
+
    it("doesn't split at escaped delimiters", function() {
        expect("hello ( world \\) ) boo").toSplitInto(
            "(", ")",
@@ -157,14 +169,14 @@ describe("A delimiter splitter", function() {
            ]);

        /* TODO(emily): make this work maybe?
-        expect("hello \\( ( world ) boo").toSplitInto(
-            "(", ")",
-            [
-                {type: "text", data: "hello \\( "},
-                {type: "math", data: " world ",
-                    rawData: "( world )", display: false},
-                {type: "text", data: " boo"},
-            ]);
+           expect("hello \\( ( world ) boo").toSplitInto(
+           "(", ")",
+           [
+           {type: "text", data: "hello \\( "},
+           {type: "math", data: " world ",
+           rawData: "( world )", display: false},
+           {type: "text", data: " boo"},
+           ]);
        */
    });

@@ -179,10 +191,20 @@ describe("A delimiter splitter", function() {
            ]);
    });

-    it("remembers which delimiters are display-mode", function() {
-        const startData = [{type: "text", data: "hello ( world ) boo"}];
+    it("ignores \\$", function() {
+        expect("$x = \\$5$").toSplitInto(
+            "$", "$",
+            [
+                {type: "math", data: "x = \\$5",
+                    rawData: "$x = \\$5$", display: false},
+            ]);
+    });

-        expect(splitAtDelimiters(startData, "(", ")", true)).toEqual(
+    it("remembers which delimiters are display-mode", function() {
+        const startData = "hello ( world ) boo";
+
+        expect(splitAtDelimiters(startData,
+                                 [{left:"(", right:")", display:true}])).toEqual(
            [
                {type: "text", data: "hello "},
                {type: "math", data: " world ",
@@ -191,42 +213,52 @@ describe("A delimiter splitter", function() {
            ]);
    });

-    it("works with more than one start datum", function() {
-        const startData = [
-            {type: "text", data: "hello ( world ) boo"},
-            {type: "math", data: "math", rawData: "(math)", display: true},
-            {type: "text", data: "hello ( world ) boo"},
-        ];
-
-        expect(splitAtDelimiters(startData, "(", ")", false)).toEqual(
+    it("handles nested delimiters irrespective of order", function() {
+        expect(splitAtDelimiters("$\\fbox{\\(hi\\)}$",
            [
-                {type: "text", data: "hello "},
-                {type: "math", data: " world ",
-                    rawData: "( world )", display: false},
-                {type: "text", data: " boo"},
-                {type: "math", data: "math", rawData: "(math)", display: true},
-                {type: "text", data: "hello "},
-                {type: "math", data: " world ",
-                    rawData: "( world )", display: false},
-                {type: "text", data: " boo"},
+                                     {left:"\\(", right:"\\)", display:false},
+                                     {left:"$", right:"$", display:false},
+            ])).toEqual(
+            [
+                {type: "math", data: "\\fbox{\\(hi\\)}",
+                    rawData: "$\\fbox{\\(hi\\)}$", display: false},
+            ]);
+        expect(splitAtDelimiters("\\(\\fbox{$hi$}\\)",
+            [
+                {left:"\\(", right:"\\)", display:false},
+                {left:"$", right:"$", display:false},
+            ])).toEqual(
+            [
+                {type: "math", data: "\\fbox{$hi$}",
+                    rawData: "\\(\\fbox{$hi$}\\)", display: false},
            ]);
    });

-    it("doesn't do splitting inside of math nodes", function() {
-        const startData = [
-            {type: "text", data: "hello ( world ) boo"},
-            {type: "math", data: "hello ( world ) boo",
-                rawData: "(hello ( world ) boo)", display: true},
-        ];
-
-        expect(splitAtDelimiters(startData, "(", ")", false)).toEqual(
+    it("handles a mix of $ and $$", function() {
+        expect(splitAtDelimiters("$hello$world$$boo$$",
            [
-                {type: "text", data: "hello "},
-                {type: "math", data: " world ",
-                    rawData: "( world )", display: false},
-                {type: "text", data: " boo"},
-                {type: "math", data: "hello ( world ) boo",
-                    rawData: "(hello ( world ) boo)", display: true},
+                {left:"$$", right:"$$", display:true},
+                {left:"$", right:"$", display:false},
+            ])).toEqual(
+            [
+                {type: "math", data: "hello",
+                    rawData: "$hello$", display: false},
+                                         {type: "text", data: "world"},
+                {type: "math", data: "boo",
+                    rawData: "$$boo$$", display: true},
+            ]);
+        expect(splitAtDelimiters("$hello$$world$$$boo$$",
+            [
+                {left:"$$", right:"$$", display:true},
+                {left:"$", right:"$", display:false},
+            ])).toEqual(
+            [
+                {type: "math", data: "hello",
+                    rawData: "$hello$", display: false},
+                {type: "math", data: "world",
+                    rawData: "$world$", display: false},
+                {type: "math", data: "boo",
+                    rawData: "$$boo$$", display: true},
            ]);
    });
 });