Refactor buildExpression to depth-first traverse groups (nodes), fix spacings and \begingroup...\endgroup (#1706)

* Refactor `buildExpression` to depth-first traverse groups (nodes)

* Fix \begingroup...\endgroup

* Rename traverseNodes to traverseNonSpaceNodes, update comments and tests

* Update Parser.js

* Update katex-spec.js.snap
This commit is contained in:
ylemkimon
2018-10-29 13:31:00 +09:00
committed by Kevin Barabash
parent 5f3ab13d02
commit a3215b284e
9 changed files with 280 additions and 139 deletions

View File

@@ -128,7 +128,13 @@ export default class Parser {
return parse;
}
static endOfExpression = ["}", "\\end", "\\right", "&"];
static endOfExpression = ["}", "\\endgroup", "\\end", "\\right", "&"];
static endOfGroup = {
"[": "]",
"{": "}",
"\\begingroup": "\\endgroup",
}
/**
* Parses an "expression", which is a list of atoms.
@@ -770,28 +776,29 @@ export default class Parser {
this.switchMode(mode);
}
let groupEnd;
let result;
// Try to parse an open brace
if (text === (optional ? "[" : "{")) {
// Try to parse an open brace or \begingroup
if (optional ? text === "[" : text === "{" || text === "\\begingroup") {
groupEnd = Parser.endOfGroup[text];
// Start a new group namespace
this.gullet.beginGroup();
// If we get a brace, parse an expression
this.consume();
const expression = this.parseExpression(false, optional ? "]" : "}");
const expression = this.parseExpression(false, groupEnd);
const lastToken = this.nextToken;
// Switch mode back before consuming symbol after close brace
if (mode) {
this.switchMode(outerMode);
}
// End group namespace before consuming symbol after close brace
this.gullet.endGroup();
// Make sure we get a close brace
this.expect(optional ? "]" : "}");
return {
result = {
type: "ordgroup",
mode: this.mode,
loc: SourceLocation.range(firstToken, lastToken),
body: expression,
// A group formed by \begingroup...\endgroup is a semi-simple group
// which doesn't affect spacing in math mode, i.e., is transparent.
// https://tex.stackexchange.com/questions/1930/when-should-one-
// use-begingroup-instead-of-bgroup
semisimple: text === "\\begingroup" || undefined,
};
} else if (optional) {
// Return nothing for an optional group
@@ -815,6 +822,10 @@ export default class Parser {
if (mode) {
this.switchMode(outerMode);
}
// Make sure we got a close brace
if (groupEnd) {
this.expect(groupEnd);
}
return result;
}

View File

@@ -10,7 +10,7 @@ import ParseError from "./ParseError";
import Style from "./Style";
import buildCommon from "./buildCommon";
import {Anchor} from "./domTree";
import utils, {assert} from "./utils";
import utils from "./utils";
import {checkNodeType} from "./parseNode";
import {spacings, tightSpacings} from "./spacingData";
import {_htmlGroupBuilders as groupBuilders} from "./defineFunction";
@@ -25,32 +25,8 @@ const makeSpan = buildCommon.makeSpan;
// Binary atoms (first class `mbin`) change into ordinary atoms (`mord`)
// depending on their surroundings. See TeXbook pg. 442-446, Rules 5 and 6,
// and the text before Rule 19.
const isBinLeftCanceller = function(
node: ?HtmlDomNode,
isRealGroup: boolean,
): boolean {
// TODO: This code assumes that a node's math class is the first element
// of its `classes` array. A later cleanup should ensure this, for
// instance by changing the signature of `makeSpan`.
if (node) {
return utils.contains(["mbin", "mopen", "mrel", "mop", "mpunct"],
getTypeOfDomTree(node, "right"));
} else {
return isRealGroup;
}
};
const isBinRightCanceller = function(
node: ?HtmlDomNode,
isRealGroup: boolean,
): boolean {
if (node) {
return utils.contains(["mrel", "mclose", "mpunct"],
getTypeOfDomTree(node, "left"));
} else {
return isRealGroup;
}
};
const binLeftCanceller = ["leftmost", "mbin", "mopen", "mrel", "mop", "mpunct"];
const binRightCanceller = ["rightmost", "mrel", "mclose", "mpunct"];
const styleMap = {
"display": Style.DISPLAY,
@@ -71,7 +47,7 @@ const DomEnum = {
mpunct: "mpunct",
minner: "minner",
};
export type DomType = $Keys<typeof DomEnum>;
type DomType = $Keys<typeof DomEnum>;
/**
* Take a list of nodes, build them in order, and return a list of the built
@@ -88,83 +64,26 @@ export const buildExpression = function(
surrounding: [?DomType, ?DomType] = [null, null],
): HtmlDomNode[] {
// Parse expressions into `groups`.
const rawGroups: HtmlDomNode[] = [];
const groups: HtmlDomNode[] = [];
for (let i = 0; i < expression.length; i++) {
const output = buildGroup(expression[i], options);
if (output instanceof DocumentFragment) {
const children: HtmlDomNode[] = output.children;
rawGroups.push(...children);
groups.push(...children);
} else {
rawGroups.push(output);
}
}
// At this point `rawGroups` consists entirely of `symbolNode`s and `span`s.
// Ignore explicit spaces (e.g., \;, \,) when determining what implicit
// spacing should go between atoms of different classes, and add dummy
// spans for determining spacings between surrounding atoms.
const nonSpaces: (?HtmlDomNode)[] = [
surrounding[0] ? makeSpan([surrounding[0]], [], options) : null,
...rawGroups.filter(group => group && group.classes[0] !== "mspace"),
surrounding[1] ? makeSpan([surrounding[1]], [], options) : null,
];
// Before determining what spaces to insert, perform bin cancellation.
// Binary operators change to ordinary symbols in some contexts.
for (let i = 1; i < nonSpaces.length - 1; i++) {
const nonSpacesI: HtmlDomNode = assert(nonSpaces[i]);
const left = getOutermostNode(nonSpacesI, "left");
if (left.classes[0] === "mbin" &&
isBinLeftCanceller(nonSpaces[i - 1], isRealGroup)) {
left.classes[0] = "mord";
}
const right = getOutermostNode(nonSpacesI, "right");
if (right.classes[0] === "mbin" &&
isBinRightCanceller(nonSpaces[i + 1], isRealGroup)) {
right.classes[0] = "mord";
groups.push(output);
}
}
const groups = [];
let j = 0;
for (let i = 0; i < rawGroups.length; i++) {
groups.push(rawGroups[i]);
// For any group that is not a space, get the next non-space. Then
// lookup what implicit space should be placed between those atoms and
// add it to groups.
if (rawGroups[i].classes[0] !== "mspace" && j < nonSpaces.length - 1) {
// if current non-space node is left dummy span, add a glue before
// first real non-space node
if (j === 0) {
groups.pop();
i--;
// If `expression` is a partial group, let the parent handle spacings
// to avoid processing groups multiple times.
if (!isRealGroup) {
return groups;
}
// Get the type of the current non-space node. If it's a document
// fragment, get the type of the rightmost node in the fragment.
const left = getTypeOfDomTree(nonSpaces[j], "right");
// Get the type of the next non-space node. If it's a document
// fragment, get the type of the leftmost node in the fragment.
const right = getTypeOfDomTree(nonSpaces[j + 1], "left");
// We use buildExpression inside of sizingGroup, but it returns a
// document fragment of elements. sizingGroup sets `isRealGroup`
// to false to avoid processing spans multiple times.
if (left && right && isRealGroup) {
const nonSpacesJp1: HtmlDomNode = assert(nonSpaces[j + 1]);
const space = isLeftTight(nonSpacesJp1)
? tightSpacings[left][right]
: spacings[left][right];
if (space) {
let glueOptions = options;
if (expression.length === 1) {
const node =
checkNodeType(expression[0], "sizing") ||
const node = checkNodeType(expression[0], "sizing") ||
checkNodeType(expression[0], "styling");
if (!node) {
// No match.
@@ -175,14 +94,105 @@ export const buildExpression = function(
}
}
groups.push(buildCommon.makeGlue(space, glueOptions));
// Dummy spans for determining spacings between surrounding atoms.
// If `expression` has no atoms on the left or right, class "leftmost"
// or "rightmost", respectively, is used to indicate it.
const dummyPrev = makeSpan([surrounding[0] || "leftmost"], [], options);
const dummyNext = makeSpan([surrounding[1] || "rightmost"], [], options);
// TODO: These code assumes that a node's math class is the first element
// of its `classes` array. A later cleanup should ensure this, for
// instance by changing the signature of `makeSpan`.
// Before determining what spaces to insert, perform bin cancellation.
// Binary operators change to ordinary symbols in some contexts.
traverseNonSpaceNodes(groups, (node, prev) => {
const prevType = prev.classes[0];
const type = node.classes[0];
if (prevType === "mbin" && utils.contains(binRightCanceller, type)) {
prev.classes[0] = "mord";
} else if (type === "mbin" && utils.contains(binLeftCanceller, prevType)) {
node.classes[0] = "mord";
}
}, {node: dummyPrev}, dummyNext);
traverseNonSpaceNodes(groups, (node, prev) => {
const prevType = getTypeOfDomTree(prev);
const type = getTypeOfDomTree(node);
// 'mtight' indicates that the node is script or scriptscript style.
const space = prevType && type ? (node.hasClass("mtight")
? tightSpacings[prevType][type]
: spacings[prevType][type]) : null;
if (space) { // Insert glue (spacing) after the `prev`.
return buildCommon.makeGlue(space, glueOptions);
}
j++;
}, {node: dummyPrev}, dummyNext);
return groups;
};
// Depth-first traverse non-space `nodes`, calling `callback` with the current and
// previous node as arguments, optionally returning a node to insert after the
// previous node. `prev` is an object with the previous node and `insertAfter`
// function to insert after it. `next` is a node that will be added to the right.
// Used for bin cancellation and inserting spacings.
const traverseNonSpaceNodes = function(
nodes: HtmlDomNode[],
callback: (HtmlDomNode, HtmlDomNode) => ?HtmlDomNode,
prev: {|
node: HtmlDomNode,
insertAfter?: HtmlDomNode => void,
|},
next: ?HtmlDomNode,
) {
if (next) { // temporarily append the right node, if exists
nodes.push(next);
}
let i = 0;
for (; i < nodes.length; i++) {
const node = nodes[i];
const partialGroup = checkPartialGroup(node);
if (partialGroup) { // Recursive DFS
traverseNonSpaceNodes(partialGroup.children, callback, prev);
continue;
}
// Ignore explicit spaces (e.g., \;, \,) when determining what implicit
// spacing should go between atoms of different classes
if (node.classes[0] === "mspace") {
continue;
}
const result = callback(node, prev.node);
if (result) {
if (prev.insertAfter) {
prev.insertAfter(result);
} else { // insert at front
nodes.unshift(result);
i++;
}
}
return groups;
prev.node = node;
prev.insertAfter = (index => n => {
nodes.splice(index + 1, 0, n);
i++;
})(i);
}
if (next) {
nodes.pop();
}
};
// Check if given node is a partial group, i.e., does not affect spacing around.
const checkPartialGroup = function(
node: HtmlDomNode,
): ?(DocumentFragment<HtmlDomNode> | Anchor) {
if (node instanceof DocumentFragment || node instanceof Anchor) {
return node;
}
return null;
};
// Return the outermost node of a domTree.
@@ -190,14 +200,14 @@ const getOutermostNode = function(
node: HtmlDomNode,
side: Side,
): HtmlDomNode {
if (node instanceof DocumentFragment ||
node instanceof Anchor) {
const children = node.children;
const partialGroup = checkPartialGroup(node);
if (partialGroup) {
const children = partialGroup.children;
if (children.length) {
if (side === "right") {
return getOutermostNode(children[children.length - 1], "right");
} else if (side === "left") {
return getOutermostNode(children[0], "right");
return getOutermostNode(children[0], "left");
}
}
}
@@ -205,29 +215,22 @@ const getOutermostNode = function(
};
// Return math atom class (mclass) of a domTree.
// If `side` is given, it will get the type of the outermost node at given side.
export const getTypeOfDomTree = function(
node: ?HtmlDomNode,
side: Side,
side: ?Side,
): ?DomType {
if (!node) {
return null;
}
if (side) {
node = getOutermostNode(node, side);
}
// This makes a lot of assumptions as to where the type of atom
// appears. We should do a better job of enforcing this.
return DomEnum[node.classes[0]] || null;
};
// If `node` is an atom return whether it's been assigned the mtight class.
// If `node` is a document fragment, return the value of isLeftTight() for the
// leftmost node in the fragment.
// 'mtight' indicates that the node is script or scriptscript style.
export const isLeftTight = function(node: HtmlDomNode): boolean {
node = getOutermostNode(node, "left");
return node.hasClass("mtight");
};
export const makeNullDelimiter = function(
options: Options,
classes: string[],

View File

@@ -184,7 +184,7 @@ defineFunction({
assertParsed(group);
// Build the inner expression
const inner = html.buildExpression(group.body, options, true,
[null, "mclose"]);
["mopen", "mclose"]);
let innerHeight = 0;
let innerDepth = 0;

View File

@@ -8,6 +8,10 @@ import * as mml from "../buildMathML";
defineFunctionBuilders({
type: "ordgroup",
htmlBuilder(group, options) {
if (group.semisimple) {
return buildCommon.makeFragment(
html.buildExpression(group.body, options, false));
}
return buildCommon.makeSpan(
["mord"], html.buildExpression(group.body, options, true), options);
},

View File

@@ -298,8 +298,6 @@ defineMacro("\\providecommand", (context) => newcommand(context, true, true));
// \let\bgroup={ \let\egroup=}
defineMacro("\\bgroup", "{");
defineMacro("\\egroup", "}");
defineMacro("\\begingroup", "{");
defineMacro("\\endgroup", "}");
// Symbols from latex.ltx:
// \def\lq{`}

View File

@@ -79,6 +79,7 @@ type ParseNodeTypes = {
mode: Mode,
loc?: ?SourceLocation,
body: AnyParseNode[],
semisimple?: boolean,
|},
"raw": {|
type: "raw",

View File

@@ -26,10 +26,9 @@ export type ArgType = "color" | "size" | "url" | "raw" | "original" | Mode;
export type StyleStr = "text" | "display" | "script" | "scriptscript";
// Allowable token text for "break" arguments in parser.
export type BreakToken = "]" | "}" | "$" | "\\)" | "\\cr";
export type BreakToken = "]" | "}" | "\\endgroup" | "$" | "\\)" | "\\cr";
// Math font variants.
export type FontVariant = "bold" | "bold-italic" | "bold-sans-serif" |
"double-struck" | "fraktur" | "italic" | "monospace" | "normal" | "sans-serif" |
"sans-serif-bold-italic" | "sans-serif-italic" | "script";

View File

@@ -857,3 +857,96 @@ exports[`Extending katex by new fonts and symbols Add new font class to new exte
</span>
`;
exports[`href and url commands should not affect spacing around 1`] = `
[
{
"classes": [
"mord",
"mathdefault"
],
"depth": 0,
"height": 0.43056,
"italic": 0,
"maxFontSize": 1,
"skew": 0,
"style": {
},
"text": "a",
"width": 0.52859
},
{
"attributes": {
},
"children": [
],
"classes": [
"mspace"
],
"depth": 0,
"height": 0,
"maxFontSize": 0,
"style": {
"marginRight": "0.2222222222222222em"
}
},
{
"attributes": {
"href": "http://example.com/"
},
"children": [
{
"classes": [
"mbin"
],
"depth": 0.08333,
"height": 0.58333,
"italic": 0,
"maxFontSize": 1,
"skew": 0,
"style": {
},
"text": "+",
"width": 0.77778
},
{
"attributes": {
},
"children": [
],
"classes": [
"mspace"
],
"depth": 0,
"height": 0,
"maxFontSize": 0,
"style": {
"marginRight": "0.2222222222222222em"
}
},
{
"classes": [
"mord",
"mathdefault"
],
"depth": 0,
"height": 0.69444,
"italic": 0,
"maxFontSize": 1,
"skew": 0,
"style": {
},
"text": "b",
"width": 0.42917
}
],
"classes": [
],
"depth": 0.08333,
"height": 0.69444,
"maxFontSize": 1,
"style": {
}
}
]
`;

View File

@@ -329,6 +329,33 @@ describe("A group parser", function() {
});
});
describe("A \\begingroup...\\endgroup parser", function() {
it("should not fail", function() {
expect`\begingroup xy \endgroup`.toParse();
});
it("should fail when it is mismatched", function() {
expect`\begingroup xy`.not.toParse();
expect`\begingroup xy }`.not.toParse();
});
it("should produce a semi-simple group", function() {
const parse = getParsed`\begingroup xy \endgroup`;
expect(parse).toHaveLength(1);
const ord = parse[0];
expect(ord.type).toMatch("ord");
expect(ord.body).toBeTruthy();
expect(ord.semisimple).toBeTruthy();
});
it("should not affect spacing in math mode", function() {
expect`\begingroup x+ \endgroup y`.toBuildLike`x+y`;
});
});
describe("An implicit group parser", function() {
it("should not fail", function() {
expect`\Large x`.toParse();
@@ -2613,6 +2640,11 @@ describe("href and url commands", function() {
allowedProtocols: [],
}));
});
it("should not affect spacing around", function() {
const built = getBuilt`a\href{http://example.com/}{+b}`;
expect(built).toMatchSnapshot();
});
});
describe("A parser that does not throw on unsupported commands", function() {