Introduce "atom" parse node to coalesce various symbol nodes. (#1541)

This commit is contained in:
Ashish Myles
2018-08-02 07:46:40 -04:00
committed by ylemkimon
parent 30be53efe6
commit 7e97a382ec
8 changed files with 126 additions and 127 deletions

View File

@@ -3,7 +3,7 @@
import functions from "./functions"; import functions from "./functions";
import environments from "./environments"; import environments from "./environments";
import MacroExpander from "./MacroExpander"; import MacroExpander from "./MacroExpander";
import symbols, {extraLatin} from "./symbols"; import symbols, {ATOMS, extraLatin} from "./symbols";
import {validUnit} from "./units"; import {validUnit} from "./units";
import {supportedCodepoint} from "./unicodeScripts"; import {supportedCodepoint} from "./unicodeScripts";
import unicodeAccents from "./unicodeAccents"; import unicodeAccents from "./unicodeAccents";
@@ -16,6 +16,7 @@ import Settings from "./Settings";
import SourceLocation from "./SourceLocation"; import SourceLocation from "./SourceLocation";
import {Token} from "./Token"; import {Token} from "./Token";
import type {AnyParseNode, SymbolParseNode} from "./parseNode"; import type {AnyParseNode, SymbolParseNode} from "./parseNode";
import type {Atom, Group} from "./symbols";
import type {Mode, ArgType, BreakToken} from "./types"; import type {Mode, ArgType, BreakToken} from "./types";
import type {FunctionContext, FunctionSpec} from "./defineFunction"; import type {FunctionContext, FunctionSpec} from "./defineFunction";
import type {EnvSpec} from "./defineEnvironment"; import type {EnvSpec} from "./defineEnvironment";
@@ -1022,14 +1023,28 @@ export default class Parser {
`Latin-1/Unicode text character "${text[0]}" used in ` + `Latin-1/Unicode text character "${text[0]}" used in ` +
`math mode`, nucleus); `math mode`, nucleus);
} }
// TODO(#1492): Remove this override once this becomes an "atom" type. const group: Group = symbols[this.mode][text].group;
// $FlowFixMe const loc = SourceLocation.range(nucleus);
const s: SymbolParseNode = { let s: SymbolParseNode;
type: symbols[this.mode][text].group, if (ATOMS.hasOwnProperty(group)) {
mode: this.mode, // $FlowFixMe
loc: SourceLocation.range(nucleus), const family: Atom = group;
value: text, s = {
}; type: "atom",
mode: this.mode,
family,
loc,
value: text,
};
} else {
// $FlowFixMe
s = {
type: group,
mode: this.mode,
loc,
value: text,
};
}
symbol = s; symbol = s;
} else if (text.charCodeAt(0) >= 0x80) { // no symbol for e.g. ^ } else if (text.charCodeAt(0) >= 0x80) { // no symbol for e.g. ^
if (this.settings.strict) { if (this.settings.strict) {

View File

@@ -4,7 +4,7 @@ import buildCommon from "../buildCommon";
import delimiter from "../delimiter"; import delimiter from "../delimiter";
import mathMLTree from "../mathMLTree"; import mathMLTree from "../mathMLTree";
import Style from "../Style"; import Style from "../Style";
import {assertNodeType, checkNodeType} from "../parseNode"; import {assertNodeType, assertAtomFamily, checkNodeType} from "../parseNode";
import * as html from "../buildHTML"; import * as html from "../buildHTML";
import * as mml from "../buildMathML"; import * as mml from "../buildMathML";
@@ -370,17 +370,17 @@ defineFunction({
// Look into the parse nodes to get the desired delimiters. // Look into the parse nodes to get the desired delimiters.
let leftNode = checkNodeType(args[0], "ordgroup"); let leftNode = checkNodeType(args[0], "ordgroup");
if (leftNode) { if (leftNode) {
leftNode = assertNodeType(leftNode.value[0], "open"); leftNode = assertAtomFamily(leftNode.value[0], "open");
} else { } else {
leftNode = assertNodeType(args[0], "open"); leftNode = assertAtomFamily(args[0], "open");
} }
const leftDelim = delimFromValue(leftNode.value); const leftDelim = delimFromValue(leftNode.value);
let rightNode = checkNodeType(args[1], "ordgroup"); let rightNode = checkNodeType(args[1], "ordgroup");
if (rightNode) { if (rightNode) {
rightNode = assertNodeType(rightNode.value[0], "close"); rightNode = assertAtomFamily(rightNode.value[0], "close");
} else { } else {
rightNode = assertNodeType(args[1], "close"); rightNode = assertAtomFamily(args[1], "close");
} }
const rightDelim = delimFromValue(rightNode.value); const rightDelim = delimFromValue(rightNode.value);

View File

@@ -45,15 +45,14 @@ defineFunction({
mathmlBuilder, mathmlBuilder,
}); });
export const binrelClass = (arg: AnyParseNode) => { export const binrelClass = (arg: AnyParseNode): string => {
// \binrel@ spacing varies with (bin|rel|ord) of the atom in the argument. // \binrel@ spacing varies with (bin|rel|ord) of the atom in the argument.
// (by rendering separately and with {}s before and after, and measuring // (by rendering separately and with {}s before and after, and measuring
// the change in spacing). We'll do roughly the same by detecting the // the change in spacing). We'll do roughly the same by detecting the
// atom type directly. // atom type directly.
const atomType = (arg.type === "ordgroup" && const atom = (arg.type === "ordgroup" && arg.value.length ? arg.value[0] : arg);
arg.value.length ? arg.value[0].type : arg.type); if (atom.type === "atom" && (atom.family === "bin" || atom.family === "rel")) {
if (/^(bin|rel)$/.test(atomType)) { return "m" + atom.family;
return "m" + atomType;
} else { } else {
return "mord"; return "mord";
} }

View File

@@ -5,48 +5,26 @@ import mathMLTree from "../mathMLTree";
import * as mml from "../buildMathML"; import * as mml from "../buildMathML";
import type Options from "../Options";
import type {ParseNode} from "../parseNode";
import type {Group} from "../symbols";
// Operator ParseNodes created in Parser.js from symbol Groups in src/symbols.js. // Operator ParseNodes created in Parser.js from symbol Groups in src/symbols.js.
// NOTE: `NODETYPE` is constrained by `Group` instead of `NodeType`. This defineFunctionBuilders({
// guarantees that `group.value` is a string as required by buildCommon.mathsym. type: "atom",
function defineOpFunction<NODETYPE: Group>( htmlBuilder(group, options) {
type: NODETYPE, return buildCommon.mathsym(
mathmlNodePostProcessor?: ( group.value, group.mode, options, ["m" + group.family]);
mathMLTree.MathNode, },
ParseNode<NODETYPE>, mathmlBuilder(group, options) {
Options) => *, const node = new mathMLTree.MathNode(
) { "mo", [mml.makeText(group.value, group.mode)]);
defineFunctionBuilders({ if (group.family === "bin") {
type, const variant = mml.getVariant(group, options);
htmlBuilder(group: ParseNode<NODETYPE>, options) { if (variant === "bold-italic") {
const groupValue: string = group.value; node.setAttribute("mathvariant", variant);
return buildCommon.mathsym(
groupValue, group.mode, options, ["m" + type]);
},
mathmlBuilder(group: ParseNode<NODETYPE>, options) {
const node = new mathMLTree.MathNode(
"mo", [mml.makeText(group.value, group.mode)]);
if (mathmlNodePostProcessor) {
mathmlNodePostProcessor(node, group, options);
} }
return node; } else if (group.family === "punct") {
}, node.setAttribute("separator", "true");
}); }
} return node;
},
defineOpFunction("bin", (mathNode, group: ParseNode<"bin">, options) => {
const variant = mml.getVariant(group, options);
if (variant === "bold-italic") {
mathNode.setAttribute("mathvariant", variant);
}
}); });
defineOpFunction("rel");
defineOpFunction("open");
defineOpFunction("close");
defineOpFunction("inner");
defineOpFunction("punct", mathNode => mathNode.setAttribute("separator", "true"));

View File

@@ -1,7 +1,8 @@
// @flow // @flow
import {GROUPS} from "./symbols"; import {NON_ATOMS} from "./symbols";
import type SourceLocation from "./SourceLocation"; import type SourceLocation from "./SourceLocation";
import type {ArrayEnvNodeData} from "./environments/array"; import type {ArrayEnvNodeData} from "./environments/array";
import type {Atom} from "./symbols";
import type {Mode, StyleStr} from "./types"; import type {Mode, StyleStr} from "./types";
import type {Token} from "./Token"; import type {Token} from "./Token";
import type {Measurement} from "./units"; import type {Measurement} from "./units";
@@ -18,15 +19,10 @@ export type LeftRightDelimType = {|
// ParseNode's corresponding to Symbol `Group`s in symbols.js. // ParseNode's corresponding to Symbol `Group`s in symbols.js.
export type SymbolParseNode = export type SymbolParseNode =
ParseNode<"atom"> |
ParseNode<"accent-token"> | ParseNode<"accent-token"> |
ParseNode<"bin"> |
ParseNode<"close"> |
ParseNode<"inner"> |
ParseNode<"mathord"> | ParseNode<"mathord"> |
ParseNode<"op-token"> | ParseNode<"op-token"> |
ParseNode<"open"> |
ParseNode<"punct"> |
ParseNode<"rel"> |
ParseNode<"spacing"> | ParseNode<"spacing"> |
ParseNode<"textord">; ParseNode<"textord">;
@@ -162,26 +158,9 @@ type ParseNodeTypes = {
// From symbol groups, constructed in Parser.js via `symbols` lookup. // From symbol groups, constructed in Parser.js via `symbols` lookup.
// (Some of these have "-token" suffix to distinguish them from existing // (Some of these have "-token" suffix to distinguish them from existing
// `ParseNode` types.) // `ParseNode` types.)
"accent-token": {| "atom": {|
type: "accent-token", type: "atom",
mode: Mode, family: Atom,
loc?: ?SourceLocation,
value: string,
|},
"bin": {|
type: "bin",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"close": {|
type: "close",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"inner": {|
type: "inner",
mode: Mode, mode: Mode,
loc?: ?SourceLocation, loc?: ?SourceLocation,
value: string, value: string,
@@ -192,30 +171,6 @@ type ParseNodeTypes = {
loc?: ?SourceLocation, loc?: ?SourceLocation,
value: string, value: string,
|}, |},
"op-token": {|
type: "op-token",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"open": {|
type: "open",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"punct": {|
type: "punct",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"rel": {|
type: "rel",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"spacing": {| "spacing": {|
type: "spacing", type: "spacing",
mode: Mode, mode: Mode,
@@ -228,6 +183,19 @@ type ParseNodeTypes = {
loc?: ?SourceLocation, loc?: ?SourceLocation,
value: string, value: string,
|}, |},
// These "-token" types don't have corresponding HTML/MathML builders.
"accent-token": {|
type: "accent-token",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
"op-token": {|
type: "op-token",
mode: Mode,
loc?: ?SourceLocation,
value: string,
|},
// From functions.js and functions/*.js. See also "color", "op", "styling", // From functions.js and functions/*.js. See also "color", "op", "styling",
// and "text" above. // and "text" above.
"accent": {| "accent": {|
@@ -599,6 +567,40 @@ export function checkNodeType<NODETYPE: NodeType>(
return null; return null;
} }
/**
* Asserts that the node is of the given type and returns it with stricter
* typing. Throws if the node's type does not match.
*/
export function assertAtomFamily(
node: ?AnyParseNode,
family: Atom,
): ParseNode<"atom"> {
const typedNode = checkAtomFamily(node, family);
if (!typedNode) {
throw new Error(
`Expected node of type "atom" and family "${family}", but got ` +
(node ?
(node.type === "atom" ?
`atom of family ${node.family}` :
`node of type ${node.type}`) :
String(node)));
}
return typedNode;
}
/**
* Returns the node more strictly typed iff it is of the given type. Otherwise,
* returns null.
*/
export function checkAtomFamily(
node: ?AnyParseNode,
family: Atom,
): ?ParseNode<"atom"> {
return node && node.type === "atom" && node.family === family ?
node :
null;
}
/** /**
* Returns the node more strictly typed iff it is of the given type. Otherwise, * Returns the node more strictly typed iff it is of the given type. Otherwise,
* returns null. * returns null.
@@ -618,7 +620,7 @@ export function assertSymbolNodeType(node: ?AnyParseNode): SymbolParseNode {
* returns null. * returns null.
*/ */
export function checkSymbolNodeType(node: ?AnyParseNode): ?SymbolParseNode { export function checkSymbolNodeType(node: ?AnyParseNode): ?SymbolParseNode {
if (node && GROUPS.hasOwnProperty(node.type)) { if (node && (node.type === "atom" || NON_ATOMS.hasOwnProperty(node.type))) {
// $FlowFixMe // $FlowFixMe
return node; return node;
} }

View File

@@ -24,20 +24,25 @@ type Font = "main" | "ams";
// types for raw text tokens, and we want to avoid conflicts with higher-level // types for raw text tokens, and we want to avoid conflicts with higher-level
// `ParseNode` types. These `ParseNode`s are constructed within `Parser` by // `ParseNode` types. These `ParseNode`s are constructed within `Parser` by
// looking up the `symbols` map. // looking up the `symbols` map.
export const GROUPS = { // Set of all the groups. export const ATOMS = {
"accent-token": 1,
"bin": 1, "bin": 1,
"close": 1, "close": 1,
"inner": 1, "inner": 1,
"mathord": 1,
"op-token": 1,
"open": 1, "open": 1,
"punct": 1, "punct": 1,
"rel": 1, "rel": 1,
};
export const NON_ATOMS = {
"accent-token": 1,
"mathord": 1,
"op-token": 1,
"spacing": 1, "spacing": 1,
"textord": 1, "textord": 1,
}; };
export type Group = $Keys<typeof GROUPS>;
export type Atom = $Keys<typeof ATOMS>;
export type NonAtom = $Keys<typeof NON_ATOMS>
export type Group = Atom | NonAtom;
type CharInfoMap = {[string]: {font: Font, group: Group, replace: ?string}}; type CharInfoMap = {[string]: {font: Font, group: Group, replace: ?string}};
const symbols: {[Mode]: CharInfoMap} = { const symbols: {[Mode]: CharInfoMap} = {

View File

@@ -127,12 +127,7 @@ const isCharacterBox = function(group: AnyParseNode): boolean {
// These are all they types of groups which hold single characters // These are all they types of groups which hold single characters
return baseElem.type === "mathord" || return baseElem.type === "mathord" ||
baseElem.type === "textord" || baseElem.type === "textord" ||
baseElem.type === "bin" || baseElem.type === "atom";
baseElem.type === "rel" ||
baseElem.type === "inner" ||
baseElem.type === "open" ||
baseElem.type === "close" ||
baseElem.type === "punct";
}; };
export const assert = function<T>(value: ?T): T { export const assert = function<T>(value: ?T): T {

View File

@@ -71,7 +71,8 @@ describe("A bin parser", function() {
for (let i = 0; i < parse.length; i++) { for (let i = 0; i < parse.length; i++) {
const group = parse[i]; const group = parse[i];
expect(group.type).toEqual("bin"); expect(group.type).toEqual("atom");
expect(group.family).toEqual("bin");
} }
}); });
}); });
@@ -96,7 +97,8 @@ describe("A rel parser", function() {
if (group.type === "mclass") { if (group.type === "mclass") {
expect(group.value.mclass).toEqual("mrel"); expect(group.value.mclass).toEqual("mrel");
} else { } else {
expect(group.type).toEqual("rel"); expect(group.type).toEqual("atom");
expect(group.family).toEqual("rel");
} }
} }
}); });
@@ -114,7 +116,8 @@ describe("A punct parser", function() {
for (let i = 0; i < parse.length; i++) { for (let i = 0; i < parse.length; i++) {
const group = parse[i]; const group = parse[i];
expect(group.type).toEqual("punct"); expect(group.type).toEqual("atom");
expect(group.family).toEqual("punct");
} }
}); });
}); });
@@ -131,7 +134,8 @@ describe("An open parser", function() {
for (let i = 0; i < parse.length; i++) { for (let i = 0; i < parse.length; i++) {
const group = parse[i]; const group = parse[i];
expect(group.type).toEqual("open"); expect(group.type).toEqual("atom");
expect(group.family).toEqual("open");
} }
}); });
}); });
@@ -148,7 +152,8 @@ describe("A close parser", function() {
for (let i = 0; i < parse.length; i++) { for (let i = 0; i < parse.length; i++) {
const group = parse[i]; const group = parse[i];
expect(group.type).toEqual("close"); expect(group.type).toEqual("atom");
expect(group.family).toEqual("close");
} }
}); });
}); });