mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-06 03:38:39 +00:00
Refactor buildExpression
to depth-first traverse groups (nodes), fix spacings and \begingroup...\endgroup (#1706)
* Refactor `buildExpression` to depth-first traverse groups (nodes) * Fix \begingroup...\endgroup * Rename traverseNodes to traverseNonSpaceNodes, update comments and tests * Update Parser.js * Update katex-spec.js.snap
This commit is contained in:
committed by
Kevin Barabash
parent
5f3ab13d02
commit
a3215b284e
@@ -128,7 +128,13 @@ export default class Parser {
|
||||
return parse;
|
||||
}
|
||||
|
||||
static endOfExpression = ["}", "\\end", "\\right", "&"];
|
||||
static endOfExpression = ["}", "\\endgroup", "\\end", "\\right", "&"];
|
||||
|
||||
static endOfGroup = {
|
||||
"[": "]",
|
||||
"{": "}",
|
||||
"\\begingroup": "\\endgroup",
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an "expression", which is a list of atoms.
|
||||
@@ -770,28 +776,29 @@ export default class Parser {
|
||||
this.switchMode(mode);
|
||||
}
|
||||
|
||||
let groupEnd;
|
||||
let result;
|
||||
// Try to parse an open brace
|
||||
if (text === (optional ? "[" : "{")) {
|
||||
// Try to parse an open brace or \begingroup
|
||||
if (optional ? text === "[" : text === "{" || text === "\\begingroup") {
|
||||
groupEnd = Parser.endOfGroup[text];
|
||||
// Start a new group namespace
|
||||
this.gullet.beginGroup();
|
||||
// If we get a brace, parse an expression
|
||||
this.consume();
|
||||
const expression = this.parseExpression(false, optional ? "]" : "}");
|
||||
const expression = this.parseExpression(false, groupEnd);
|
||||
const lastToken = this.nextToken;
|
||||
// Switch mode back before consuming symbol after close brace
|
||||
if (mode) {
|
||||
this.switchMode(outerMode);
|
||||
}
|
||||
// End group namespace before consuming symbol after close brace
|
||||
this.gullet.endGroup();
|
||||
// Make sure we get a close brace
|
||||
this.expect(optional ? "]" : "}");
|
||||
return {
|
||||
result = {
|
||||
type: "ordgroup",
|
||||
mode: this.mode,
|
||||
loc: SourceLocation.range(firstToken, lastToken),
|
||||
body: expression,
|
||||
// A group formed by \begingroup...\endgroup is a semi-simple group
|
||||
// which doesn't affect spacing in math mode, i.e., is transparent.
|
||||
// https://tex.stackexchange.com/questions/1930/when-should-one-
|
||||
// use-begingroup-instead-of-bgroup
|
||||
semisimple: text === "\\begingroup" || undefined,
|
||||
};
|
||||
} else if (optional) {
|
||||
// Return nothing for an optional group
|
||||
@@ -815,6 +822,10 @@ export default class Parser {
|
||||
if (mode) {
|
||||
this.switchMode(outerMode);
|
||||
}
|
||||
// Make sure we got a close brace
|
||||
if (groupEnd) {
|
||||
this.expect(groupEnd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
249
src/buildHTML.js
249
src/buildHTML.js
@@ -10,7 +10,7 @@ import ParseError from "./ParseError";
|
||||
import Style from "./Style";
|
||||
import buildCommon from "./buildCommon";
|
||||
import {Anchor} from "./domTree";
|
||||
import utils, {assert} from "./utils";
|
||||
import utils from "./utils";
|
||||
import {checkNodeType} from "./parseNode";
|
||||
import {spacings, tightSpacings} from "./spacingData";
|
||||
import {_htmlGroupBuilders as groupBuilders} from "./defineFunction";
|
||||
@@ -25,32 +25,8 @@ const makeSpan = buildCommon.makeSpan;
|
||||
// Binary atoms (first class `mbin`) change into ordinary atoms (`mord`)
|
||||
// depending on their surroundings. See TeXbook pg. 442-446, Rules 5 and 6,
|
||||
// and the text before Rule 19.
|
||||
const isBinLeftCanceller = function(
|
||||
node: ?HtmlDomNode,
|
||||
isRealGroup: boolean,
|
||||
): boolean {
|
||||
// TODO: This code assumes that a node's math class is the first element
|
||||
// of its `classes` array. A later cleanup should ensure this, for
|
||||
// instance by changing the signature of `makeSpan`.
|
||||
if (node) {
|
||||
return utils.contains(["mbin", "mopen", "mrel", "mop", "mpunct"],
|
||||
getTypeOfDomTree(node, "right"));
|
||||
} else {
|
||||
return isRealGroup;
|
||||
}
|
||||
};
|
||||
|
||||
const isBinRightCanceller = function(
|
||||
node: ?HtmlDomNode,
|
||||
isRealGroup: boolean,
|
||||
): boolean {
|
||||
if (node) {
|
||||
return utils.contains(["mrel", "mclose", "mpunct"],
|
||||
getTypeOfDomTree(node, "left"));
|
||||
} else {
|
||||
return isRealGroup;
|
||||
}
|
||||
};
|
||||
const binLeftCanceller = ["leftmost", "mbin", "mopen", "mrel", "mop", "mpunct"];
|
||||
const binRightCanceller = ["rightmost", "mrel", "mclose", "mpunct"];
|
||||
|
||||
const styleMap = {
|
||||
"display": Style.DISPLAY,
|
||||
@@ -71,7 +47,7 @@ const DomEnum = {
|
||||
mpunct: "mpunct",
|
||||
minner: "minner",
|
||||
};
|
||||
export type DomType = $Keys<typeof DomEnum>;
|
||||
type DomType = $Keys<typeof DomEnum>;
|
||||
|
||||
/**
|
||||
* Take a list of nodes, build them in order, and return a list of the built
|
||||
@@ -88,116 +64,150 @@ export const buildExpression = function(
|
||||
surrounding: [?DomType, ?DomType] = [null, null],
|
||||
): HtmlDomNode[] {
|
||||
// Parse expressions into `groups`.
|
||||
const rawGroups: HtmlDomNode[] = [];
|
||||
const groups: HtmlDomNode[] = [];
|
||||
for (let i = 0; i < expression.length; i++) {
|
||||
const output = buildGroup(expression[i], options);
|
||||
if (output instanceof DocumentFragment) {
|
||||
const children: HtmlDomNode[] = output.children;
|
||||
rawGroups.push(...children);
|
||||
groups.push(...children);
|
||||
} else {
|
||||
rawGroups.push(output);
|
||||
groups.push(output);
|
||||
}
|
||||
}
|
||||
// At this point `rawGroups` consists entirely of `symbolNode`s and `span`s.
|
||||
|
||||
// Ignore explicit spaces (e.g., \;, \,) when determining what implicit
|
||||
// spacing should go between atoms of different classes, and add dummy
|
||||
// spans for determining spacings between surrounding atoms.
|
||||
const nonSpaces: (?HtmlDomNode)[] = [
|
||||
surrounding[0] ? makeSpan([surrounding[0]], [], options) : null,
|
||||
...rawGroups.filter(group => group && group.classes[0] !== "mspace"),
|
||||
surrounding[1] ? makeSpan([surrounding[1]], [], options) : null,
|
||||
];
|
||||
// If `expression` is a partial group, let the parent handle spacings
|
||||
// to avoid processing groups multiple times.
|
||||
if (!isRealGroup) {
|
||||
return groups;
|
||||
}
|
||||
|
||||
let glueOptions = options;
|
||||
if (expression.length === 1) {
|
||||
const node = checkNodeType(expression[0], "sizing") ||
|
||||
checkNodeType(expression[0], "styling");
|
||||
if (!node) {
|
||||
// No match.
|
||||
} else if (node.type === "sizing") {
|
||||
glueOptions = options.havingSize(node.size);
|
||||
} else if (node.type === "styling") {
|
||||
glueOptions = options.havingStyle(styleMap[node.style]);
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy spans for determining spacings between surrounding atoms.
|
||||
// If `expression` has no atoms on the left or right, class "leftmost"
|
||||
// or "rightmost", respectively, is used to indicate it.
|
||||
const dummyPrev = makeSpan([surrounding[0] || "leftmost"], [], options);
|
||||
const dummyNext = makeSpan([surrounding[1] || "rightmost"], [], options);
|
||||
|
||||
// TODO: These code assumes that a node's math class is the first element
|
||||
// of its `classes` array. A later cleanup should ensure this, for
|
||||
// instance by changing the signature of `makeSpan`.
|
||||
|
||||
// Before determining what spaces to insert, perform bin cancellation.
|
||||
// Binary operators change to ordinary symbols in some contexts.
|
||||
for (let i = 1; i < nonSpaces.length - 1; i++) {
|
||||
const nonSpacesI: HtmlDomNode = assert(nonSpaces[i]);
|
||||
const left = getOutermostNode(nonSpacesI, "left");
|
||||
if (left.classes[0] === "mbin" &&
|
||||
isBinLeftCanceller(nonSpaces[i - 1], isRealGroup)) {
|
||||
left.classes[0] = "mord";
|
||||
traverseNonSpaceNodes(groups, (node, prev) => {
|
||||
const prevType = prev.classes[0];
|
||||
const type = node.classes[0];
|
||||
if (prevType === "mbin" && utils.contains(binRightCanceller, type)) {
|
||||
prev.classes[0] = "mord";
|
||||
} else if (type === "mbin" && utils.contains(binLeftCanceller, prevType)) {
|
||||
node.classes[0] = "mord";
|
||||
}
|
||||
}, {node: dummyPrev}, dummyNext);
|
||||
|
||||
const right = getOutermostNode(nonSpacesI, "right");
|
||||
if (right.classes[0] === "mbin" &&
|
||||
isBinRightCanceller(nonSpaces[i + 1], isRealGroup)) {
|
||||
right.classes[0] = "mord";
|
||||
traverseNonSpaceNodes(groups, (node, prev) => {
|
||||
const prevType = getTypeOfDomTree(prev);
|
||||
const type = getTypeOfDomTree(node);
|
||||
|
||||
// 'mtight' indicates that the node is script or scriptscript style.
|
||||
const space = prevType && type ? (node.hasClass("mtight")
|
||||
? tightSpacings[prevType][type]
|
||||
: spacings[prevType][type]) : null;
|
||||
if (space) { // Insert glue (spacing) after the `prev`.
|
||||
return buildCommon.makeGlue(space, glueOptions);
|
||||
}
|
||||
}
|
||||
|
||||
const groups = [];
|
||||
let j = 0;
|
||||
for (let i = 0; i < rawGroups.length; i++) {
|
||||
groups.push(rawGroups[i]);
|
||||
|
||||
// For any group that is not a space, get the next non-space. Then
|
||||
// lookup what implicit space should be placed between those atoms and
|
||||
// add it to groups.
|
||||
if (rawGroups[i].classes[0] !== "mspace" && j < nonSpaces.length - 1) {
|
||||
// if current non-space node is left dummy span, add a glue before
|
||||
// first real non-space node
|
||||
if (j === 0) {
|
||||
groups.pop();
|
||||
i--;
|
||||
}
|
||||
|
||||
// Get the type of the current non-space node. If it's a document
|
||||
// fragment, get the type of the rightmost node in the fragment.
|
||||
const left = getTypeOfDomTree(nonSpaces[j], "right");
|
||||
|
||||
// Get the type of the next non-space node. If it's a document
|
||||
// fragment, get the type of the leftmost node in the fragment.
|
||||
const right = getTypeOfDomTree(nonSpaces[j + 1], "left");
|
||||
|
||||
// We use buildExpression inside of sizingGroup, but it returns a
|
||||
// document fragment of elements. sizingGroup sets `isRealGroup`
|
||||
// to false to avoid processing spans multiple times.
|
||||
if (left && right && isRealGroup) {
|
||||
const nonSpacesJp1: HtmlDomNode = assert(nonSpaces[j + 1]);
|
||||
const space = isLeftTight(nonSpacesJp1)
|
||||
? tightSpacings[left][right]
|
||||
: spacings[left][right];
|
||||
|
||||
if (space) {
|
||||
let glueOptions = options;
|
||||
|
||||
if (expression.length === 1) {
|
||||
const node =
|
||||
checkNodeType(expression[0], "sizing") ||
|
||||
checkNodeType(expression[0], "styling");
|
||||
if (!node) {
|
||||
// No match.
|
||||
} else if (node.type === "sizing") {
|
||||
glueOptions = options.havingSize(node.size);
|
||||
} else if (node.type === "styling") {
|
||||
glueOptions = options.havingStyle(styleMap[node.style]);
|
||||
}
|
||||
}
|
||||
|
||||
groups.push(buildCommon.makeGlue(space, glueOptions));
|
||||
}
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}, {node: dummyPrev}, dummyNext);
|
||||
|
||||
return groups;
|
||||
};
|
||||
|
||||
// Depth-first traverse non-space `nodes`, calling `callback` with the current and
|
||||
// previous node as arguments, optionally returning a node to insert after the
|
||||
// previous node. `prev` is an object with the previous node and `insertAfter`
|
||||
// function to insert after it. `next` is a node that will be added to the right.
|
||||
// Used for bin cancellation and inserting spacings.
|
||||
const traverseNonSpaceNodes = function(
|
||||
nodes: HtmlDomNode[],
|
||||
callback: (HtmlDomNode, HtmlDomNode) => ?HtmlDomNode,
|
||||
prev: {|
|
||||
node: HtmlDomNode,
|
||||
insertAfter?: HtmlDomNode => void,
|
||||
|},
|
||||
next: ?HtmlDomNode,
|
||||
) {
|
||||
if (next) { // temporarily append the right node, if exists
|
||||
nodes.push(next);
|
||||
}
|
||||
let i = 0;
|
||||
for (; i < nodes.length; i++) {
|
||||
const node = nodes[i];
|
||||
const partialGroup = checkPartialGroup(node);
|
||||
if (partialGroup) { // Recursive DFS
|
||||
traverseNonSpaceNodes(partialGroup.children, callback, prev);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ignore explicit spaces (e.g., \;, \,) when determining what implicit
|
||||
// spacing should go between atoms of different classes
|
||||
if (node.classes[0] === "mspace") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = callback(node, prev.node);
|
||||
if (result) {
|
||||
if (prev.insertAfter) {
|
||||
prev.insertAfter(result);
|
||||
} else { // insert at front
|
||||
nodes.unshift(result);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
prev.node = node;
|
||||
prev.insertAfter = (index => n => {
|
||||
nodes.splice(index + 1, 0, n);
|
||||
i++;
|
||||
})(i);
|
||||
}
|
||||
if (next) {
|
||||
nodes.pop();
|
||||
}
|
||||
};
|
||||
|
||||
// Check if given node is a partial group, i.e., does not affect spacing around.
|
||||
const checkPartialGroup = function(
|
||||
node: HtmlDomNode,
|
||||
): ?(DocumentFragment<HtmlDomNode> | Anchor) {
|
||||
if (node instanceof DocumentFragment || node instanceof Anchor) {
|
||||
return node;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
// Return the outermost node of a domTree.
|
||||
const getOutermostNode = function(
|
||||
node: HtmlDomNode,
|
||||
side: Side,
|
||||
): HtmlDomNode {
|
||||
if (node instanceof DocumentFragment ||
|
||||
node instanceof Anchor) {
|
||||
const children = node.children;
|
||||
const partialGroup = checkPartialGroup(node);
|
||||
if (partialGroup) {
|
||||
const children = partialGroup.children;
|
||||
if (children.length) {
|
||||
if (side === "right") {
|
||||
return getOutermostNode(children[children.length - 1], "right");
|
||||
} else if (side === "left") {
|
||||
return getOutermostNode(children[0], "right");
|
||||
return getOutermostNode(children[0], "left");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -205,29 +215,22 @@ const getOutermostNode = function(
|
||||
};
|
||||
|
||||
// Return math atom class (mclass) of a domTree.
|
||||
// If `side` is given, it will get the type of the outermost node at given side.
|
||||
export const getTypeOfDomTree = function(
|
||||
node: ?HtmlDomNode,
|
||||
side: Side,
|
||||
side: ?Side,
|
||||
): ?DomType {
|
||||
if (!node) {
|
||||
return null;
|
||||
}
|
||||
|
||||
node = getOutermostNode(node, side);
|
||||
if (side) {
|
||||
node = getOutermostNode(node, side);
|
||||
}
|
||||
// This makes a lot of assumptions as to where the type of atom
|
||||
// appears. We should do a better job of enforcing this.
|
||||
return DomEnum[node.classes[0]] || null;
|
||||
};
|
||||
|
||||
// If `node` is an atom return whether it's been assigned the mtight class.
|
||||
// If `node` is a document fragment, return the value of isLeftTight() for the
|
||||
// leftmost node in the fragment.
|
||||
// 'mtight' indicates that the node is script or scriptscript style.
|
||||
export const isLeftTight = function(node: HtmlDomNode): boolean {
|
||||
node = getOutermostNode(node, "left");
|
||||
return node.hasClass("mtight");
|
||||
};
|
||||
|
||||
export const makeNullDelimiter = function(
|
||||
options: Options,
|
||||
classes: string[],
|
||||
|
@@ -184,7 +184,7 @@ defineFunction({
|
||||
assertParsed(group);
|
||||
// Build the inner expression
|
||||
const inner = html.buildExpression(group.body, options, true,
|
||||
[null, "mclose"]);
|
||||
["mopen", "mclose"]);
|
||||
|
||||
let innerHeight = 0;
|
||||
let innerDepth = 0;
|
||||
|
@@ -8,6 +8,10 @@ import * as mml from "../buildMathML";
|
||||
defineFunctionBuilders({
|
||||
type: "ordgroup",
|
||||
htmlBuilder(group, options) {
|
||||
if (group.semisimple) {
|
||||
return buildCommon.makeFragment(
|
||||
html.buildExpression(group.body, options, false));
|
||||
}
|
||||
return buildCommon.makeSpan(
|
||||
["mord"], html.buildExpression(group.body, options, true), options);
|
||||
},
|
||||
|
@@ -298,8 +298,6 @@ defineMacro("\\providecommand", (context) => newcommand(context, true, true));
|
||||
// \let\bgroup={ \let\egroup=}
|
||||
defineMacro("\\bgroup", "{");
|
||||
defineMacro("\\egroup", "}");
|
||||
defineMacro("\\begingroup", "{");
|
||||
defineMacro("\\endgroup", "}");
|
||||
|
||||
// Symbols from latex.ltx:
|
||||
// \def\lq{`}
|
||||
|
@@ -79,6 +79,7 @@ type ParseNodeTypes = {
|
||||
mode: Mode,
|
||||
loc?: ?SourceLocation,
|
||||
body: AnyParseNode[],
|
||||
semisimple?: boolean,
|
||||
|},
|
||||
"raw": {|
|
||||
type: "raw",
|
||||
|
@@ -26,10 +26,9 @@ export type ArgType = "color" | "size" | "url" | "raw" | "original" | Mode;
|
||||
export type StyleStr = "text" | "display" | "script" | "scriptscript";
|
||||
|
||||
// Allowable token text for "break" arguments in parser.
|
||||
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\cr";
|
||||
export type BreakToken = "]" | "}" | "\\endgroup" | "$" | "\\)" | "\\cr";
|
||||
|
||||
// Math font variants.
|
||||
export type FontVariant = "bold" | "bold-italic" | "bold-sans-serif" |
|
||||
"double-struck" | "fraktur" | "italic" | "monospace" | "normal" | "sans-serif" |
|
||||
"sans-serif-bold-italic" | "sans-serif-italic" | "script";
|
||||
|
||||
|
@@ -857,3 +857,96 @@ exports[`Extending katex by new fonts and symbols Add new font class to new exte
|
||||
</span>
|
||||
|
||||
`;
|
||||
|
||||
exports[`href and url commands should not affect spacing around 1`] = `
|
||||
[
|
||||
{
|
||||
"classes": [
|
||||
"mord",
|
||||
"mathdefault"
|
||||
],
|
||||
"depth": 0,
|
||||
"height": 0.43056,
|
||||
"italic": 0,
|
||||
"maxFontSize": 1,
|
||||
"skew": 0,
|
||||
"style": {
|
||||
},
|
||||
"text": "a",
|
||||
"width": 0.52859
|
||||
},
|
||||
{
|
||||
"attributes": {
|
||||
},
|
||||
"children": [
|
||||
],
|
||||
"classes": [
|
||||
"mspace"
|
||||
],
|
||||
"depth": 0,
|
||||
"height": 0,
|
||||
"maxFontSize": 0,
|
||||
"style": {
|
||||
"marginRight": "0.2222222222222222em"
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributes": {
|
||||
"href": "http://example.com/"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"classes": [
|
||||
"mbin"
|
||||
],
|
||||
"depth": 0.08333,
|
||||
"height": 0.58333,
|
||||
"italic": 0,
|
||||
"maxFontSize": 1,
|
||||
"skew": 0,
|
||||
"style": {
|
||||
},
|
||||
"text": "+",
|
||||
"width": 0.77778
|
||||
},
|
||||
{
|
||||
"attributes": {
|
||||
},
|
||||
"children": [
|
||||
],
|
||||
"classes": [
|
||||
"mspace"
|
||||
],
|
||||
"depth": 0,
|
||||
"height": 0,
|
||||
"maxFontSize": 0,
|
||||
"style": {
|
||||
"marginRight": "0.2222222222222222em"
|
||||
}
|
||||
},
|
||||
{
|
||||
"classes": [
|
||||
"mord",
|
||||
"mathdefault"
|
||||
],
|
||||
"depth": 0,
|
||||
"height": 0.69444,
|
||||
"italic": 0,
|
||||
"maxFontSize": 1,
|
||||
"skew": 0,
|
||||
"style": {
|
||||
},
|
||||
"text": "b",
|
||||
"width": 0.42917
|
||||
}
|
||||
],
|
||||
"classes": [
|
||||
],
|
||||
"depth": 0.08333,
|
||||
"height": 0.69444,
|
||||
"maxFontSize": 1,
|
||||
"style": {
|
||||
}
|
||||
}
|
||||
]
|
||||
`;
|
||||
|
@@ -329,6 +329,33 @@ describe("A group parser", function() {
|
||||
});
|
||||
});
|
||||
|
||||
describe("A \\begingroup...\\endgroup parser", function() {
|
||||
it("should not fail", function() {
|
||||
expect`\begingroup xy \endgroup`.toParse();
|
||||
});
|
||||
|
||||
it("should fail when it is mismatched", function() {
|
||||
expect`\begingroup xy`.not.toParse();
|
||||
expect`\begingroup xy }`.not.toParse();
|
||||
});
|
||||
|
||||
it("should produce a semi-simple group", function() {
|
||||
const parse = getParsed`\begingroup xy \endgroup`;
|
||||
|
||||
expect(parse).toHaveLength(1);
|
||||
|
||||
const ord = parse[0];
|
||||
|
||||
expect(ord.type).toMatch("ord");
|
||||
expect(ord.body).toBeTruthy();
|
||||
expect(ord.semisimple).toBeTruthy();
|
||||
});
|
||||
|
||||
it("should not affect spacing in math mode", function() {
|
||||
expect`\begingroup x+ \endgroup y`.toBuildLike`x+y`;
|
||||
});
|
||||
});
|
||||
|
||||
describe("An implicit group parser", function() {
|
||||
it("should not fail", function() {
|
||||
expect`\Large x`.toParse();
|
||||
@@ -2613,6 +2640,11 @@ describe("href and url commands", function() {
|
||||
allowedProtocols: [],
|
||||
}));
|
||||
});
|
||||
|
||||
it("should not affect spacing around", function() {
|
||||
const built = getBuilt`a\href{http://example.com/}{+b}`;
|
||||
expect(built).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
|
||||
describe("A parser that does not throw on unsupported commands", function() {
|
||||
|
Reference in New Issue
Block a user