move metrics to katex-fonts submodule (#1134)

* move metrics to katex-fonts submodule * update katex-fonts submodule * update katex-fonts submodule after pulling down changes
2025-10-05 03:08:40 +00:00 · 2018-02-11 16:36:13 -05:00
parent 3f69de5036
commit 9b2101f6b4
10 changed files with 3 additions and 3898 deletions
--- a/metrics/README.md
+++ b/metrics/README.md
@@ -1,21 +0,0 @@
-### How to generate new metrics
-------------------------------
-
-There are several requirements for generating the metrics used by KaTeX.
-
- You need to have an installation of TeX which supports kpathsea. You can check
-  this by running `tex --version`, and seeing if it has a line that looks like
-  > kpathsea version 6.2.0
-
- You need the JSON module for perl. You can install this either from CPAN
-  (possibly using the `cpan` command line tool) or with your package manager.
-
- You need the python module fonttools. You can install this either from PyPi
-  (using `easy_install` or `pip`) or with your package manager.
-
-Once you have these things, run
-
-    make metrics
-
-which should generate new metrics and place them into `fontMetricsData.json`.
-You're done!
--- a/metrics/extract_tfms.py
+++ b/metrics/extract_tfms.py
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-
-import collections
-import json
-import parse_tfm
-import subprocess
-import sys
-
-
-def find_font_path(font_name):
-    try:
-        font_path = subprocess.check_output(['kpsewhich', font_name])
-    except OSError:
-        raise RuntimeError("Couldn't find kpsewhich program, make sure you" +
-                           " have TeX installed")
-    except subprocess.CalledProcessError:
-        raise RuntimeError("Couldn't find font metrics: '%s'" % font_name)
-    return font_path.strip()
-
-
-def main():
-    mapping = json.load(sys.stdin)
-
-    fonts = [
-        'cmbsy10.tfm',
-        'cmbx10.tfm',
-        'cmbxti10.tfm',
-        'cmex10.tfm',
-        'cmmi10.tfm',
-        'cmmib10.tfm',
-        'cmr10.tfm',
-        'cmsy10.tfm',
-        'cmti10.tfm',
-        'msam10.tfm',
-        'msbm10.tfm',
-        'eufm10.tfm',
-        'cmtt10.tfm',
-        'rsfs10.tfm',
-        'cmss10.tfm',
-        'cmssbx10.tfm',
-        'cmssi10.tfm',
-    ]
-
-    # Extracted by running `\font\a=<font>` and then `\showthe\skewchar\a` in
-    # TeX, where `<font>` is the name of the font listed here. The skewchar
-    # will be printed out in the output. If it outputs `-1`, that means there
-    # is no skewchar, so we use `None` here.
-    font_skewchar = {
-        'cmbsy10': None,
-        'cmbx10': None,
-        'cmbxti10': None,
-        'cmex10': None,
-        'cmmi10': 127,
-        'cmmib10': None,
-        'cmr10': None,
-        'cmsy10': 48,
-        'cmti10': None,
-        'msam10': None,
-        'msbm10': None,
-        'eufm10': None,
-        'cmtt10': None,
-        'rsfs10': None,
-        'cmss10': None,
-        'cmssbx10': None,
-        'cmssi10': None,
-    }
-
-    font_name_to_tfm = {}
-
-    for font_name in fonts:
-        font_basename = font_name.split('.')[0]
-        font_path = find_font_path(font_name)
-        font_name_to_tfm[font_basename] = parse_tfm.read_tfm_file(font_path)
-
-    families = collections.defaultdict(dict)
-
-    for family, chars in mapping.iteritems():
-        for char, char_data in chars.iteritems():
-            char_num = int(char)
-
-            font = char_data['font']
-            tex_char_num = int(char_data['char'])
-            yshift = float(char_data['yshift'])
-
-            if family == "Script-Regular":
-                tfm_char = font_name_to_tfm[font].get_char_metrics(tex_char_num,
-                                                                   fix_rsfs=True)
-            else:
-                tfm_char = font_name_to_tfm[font].get_char_metrics(tex_char_num)
-
-            height = round(tfm_char.height + yshift / 1000.0, 5)
-            depth = round(tfm_char.depth - yshift / 1000.0, 5)
-            italic = round(tfm_char.italic_correction, 5)
-            width = round(tfm_char.width, 5)
-
-            skewkern = 0.0
-            if (font_skewchar[font] and
-                    font_skewchar[font] in tfm_char.kern_table):
-                skewkern = round(
-                    tfm_char.kern_table[font_skewchar[font]], 5)
-
-            families[family][char_num] = {
-                'height': height,
-                'depth': depth,
-                'italic': italic,
-                'skew': skewkern,
-                'width': width
-            }
-
-    sys.stdout.write(
-        json.dumps(families, separators=(',', ':'), sort_keys=True))
-
-if __name__ == '__main__':
-    main()
--- a/metrics/extract_ttfs.py
+++ b/metrics/extract_ttfs.py
@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-
-from fontTools.ttLib import TTFont
-import sys
-import json
-
-# map of characters to extract
-metrics_to_extract = {
-    # Font name
-    "AMS-Regular": {
-        u"\u21e2": None,  # \dashrightarrow
-        u"\u21e0": None,  # \dashleftarrow
-    },
-    "Main-Regular": {
-        # Skew and italic metrics can't be easily parsed from the TTF. Instead,
-        # we map each character to a "base character", which is a character
-        # from the same font with correct italic and skew metrics. A character
-        # maps to None if it doesn't have a base.
-
-        u"\u2260": None,  # \neq
-        u"\u2245": None,  # \cong
-        u"\u0020": None,  # space
-        u"\u00a0": None,  # nbsp
-        u"\u2026": None,  # \ldots
-        u"\u22ef": None,  # \cdots
-        u"\u22f1": None,  # \ddots
-        u"\u22ee": None,  # \vdots
-        u"\u22ee": None,  # \vdots
-        u"\u22a8": None,  # \models
-        u"\u22c8": None,  # \bowtie
-        u"\u2250": None,  # \doteq
-        u"\u23b0": None,  # \lmoustache
-        u"\u23b1": None,  # \rmoustache
-        u"\u27ee": None,  # \lgroup
-        u"\u27ef": None,  # \rgroup
-        u"\u27f5": None,  # \longleftarrow
-        u"\u27f8": None,  # \Longleftarrow
-        u"\u27f6": None,  # \longrightarrow
-        u"\u27f9": None,  # \Longrightarrow
-        u"\u27f7": None,  # \longleftrightarrow
-        u"\u27fa": None,  # \Longleftrightarrow
-        u"\u21a6": None,  # \mapsto
-        u"\u27fc": None,  # \longmapsto
-        u"\u21a9": None,  # \hookleftarrow
-        u"\u21aa": None,  # \hookrightarrow
-        u"\u21cc": None,  # \rightleftharpoons
-    },
-    "Size1-Regular": {
-        u"\u222c": u"\u222b",  # \iint, based on \int
-        u"\u222d": u"\u222b",  # \iiint, based on \int
-    },
-    "Size2-Regular": {
-        u"\u222c": u"\u222b",  # \iint, based on \int
-        u"\u222d": u"\u222b",  # \iiint, based on \int
-    },
-}
-
-
-def main():
-    start_json = json.load(sys.stdin)
-
-    for font, chars in metrics_to_extract.iteritems():
-        fontInfo = TTFont("../submodules/katex-fonts/fonts/KaTeX_" + font + ".ttf")
-        glyf = fontInfo["glyf"]
-        unitsPerEm = float(fontInfo["head"].unitsPerEm)
-
-        # We keep ALL Unicode cmaps, not just fontInfo["cmap"].getcmap(3, 1).
-        # This is playing it extra safe, since it reports inconsistencies.
-        # Platform 0 is Unicode, platform 3 is Windows. For platform 3,
-        # encoding 1 is UCS-2 and encoding 10 is UCS-4.
-        cmap = [t.cmap for t in fontInfo["cmap"].tables
-                if (t.platformID == 0)
-                or (t.platformID == 3 and t.platEncID in (1, 10))]
-
-        for char, base_char in chars.iteritems():
-            code = ord(char)
-            names = set(t.get(code) for t in cmap)
-            if not names:
-                sys.stderr.write(
-                    "Codepoint {} of font {} maps to no name\n"
-                    .format(code, font))
-                continue
-            if len(names) != 1:
-                sys.stderr.write(
-                    "Codepoint {} of font {} maps to multiple names: {}\n"
-                    .format(code, font, ", ".join(sorted(names))))
-                continue
-            name = names.pop()
-
-            height = depth = italic = skew = width = 0
-            glyph = glyf[name]
-            if glyph.numberOfContours:
-                height = glyph.yMax
-                depth = -glyph.yMin
-                width = glyph.xMax - glyph.xMin
-            if base_char:
-                base_char_str = str(ord(base_char))
-                base_metrics = start_json[font][base_char_str]
-                italic = base_metrics["italic"]
-                skew = base_metrics["skew"]
-                width = base_metrics["width"]
-
-            start_json[font][str(code)] = {
-                "height": height / unitsPerEm,
-                "depth": depth / unitsPerEm,
-                "italic": italic,
-                "skew": skew,
-                "width": width
-            }
-
-    sys.stdout.write(
-        json.dumps(start_json, separators=(',', ':'), sort_keys=True))
-
-if __name__ == "__main__":
-    main()
--- a/metrics/format_json.py
+++ b/metrics/format_json.py
@@ -1,26 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import json
-
-props = ['depth', 'height', 'italic', 'skew']
-
-if len(sys.argv) > 1:
-    if sys.argv[1] == '--width':
-        props.append('width')
-
-data = json.load(sys.stdin)
-sep = "export default {\n    "
-for font in sorted(data):
-    sys.stdout.write(sep + json.dumps(font))
-    sep = ": {\n        "
-    for glyph in sorted(data[font], key=int):
-        sys.stdout.write(sep + json.dumps(glyph) + ": ")
-
-        values = [value if value != 0.0 else 0 for value in
-                  [data[font][glyph][key] for key in props]]
-
-        sys.stdout.write(json.dumps(values))
-        sep = ",\n        "
-    sep = ",\n    },\n    "
-sys.stdout.write(",\n    },\n};\n")
--- a/metrics/mapping.pl
+++ b/metrics/mapping.pl
--- a/metrics/parse_tfm.py
+++ b/metrics/parse_tfm.py
@@ -1,211 +0,0 @@
-class CharInfoWord(object):
-    def __init__(self, word):
-        b1, b2, b3, b4 = (word >> 24,
-                          (word & 0xff0000) >> 16,
-                          (word & 0xff00) >> 8,
-                          word & 0xff)
-
-        self.width_index = b1
-        self.height_index = b2 >> 4
-        self.depth_index = b2 & 0x0f
-        self.italic_index = (b3 & 0b11111100) >> 2
-        self.tag = b3 & 0b11
-        self.remainder = b4
-
-    def has_ligkern(self):
-        return self.tag == 1
-
-    def ligkern_start(self):
-        return self.remainder
-
-
-class LigKernProgram(object):
-    def __init__(self, program):
-        self.program = program
-
-    def execute(self, start, next_char):
-        curr_instruction = start
-        while True:
-            instruction = self.program[curr_instruction]
-            (skip, inst_next_char, op, remainder) = instruction
-
-            if inst_next_char == next_char:
-                if op < 128:
-                    # Don't worry about ligatures for now, we only need kerns
-                    return None
-                else:
-                    return 256 * (op - 128) + remainder
-            elif skip >= 128:
-                return None
-            else:
-                curr_instruction += 1 + skip
-
-
-class TfmCharMetrics(object):
-    def __init__(self, width, height, depth, italic, kern_table):
-        self.width = width
-        self.height = height
-        self.depth = depth
-        self.italic_correction = italic
-        self.kern_table = kern_table
-
-
-class TfmFile(object):
-    def __init__(self, start_char, end_char, char_info, width_table,
-                 height_table, depth_table, italic_table, ligkern_table,
-                 kern_table):
-        self.start_char = start_char
-        self.end_char = end_char
-        self.char_info = char_info
-        self.width_table = width_table
-        self.height_table = height_table
-        self.depth_table = depth_table
-        self.italic_table = italic_table
-        self.ligkern_program = LigKernProgram(ligkern_table)
-        self.kern_table = kern_table
-
-    def get_char_metrics(self, char_num, fix_rsfs=False):
-        """Return glyph metrics for a unicode code point.
-
-        Arguments:
-            char_num: a unicode code point
-            fix_rsfs: adjust for rsfs10.tfm's different indexing system
-        """
-        if char_num < self.start_char or char_num > self.end_char:
-            raise RuntimeError("Invalid character number")
-
-        if fix_rsfs:
-            # all of the char_nums contained start from zero in rsfs10.tfm
-            info = self.char_info[char_num - self.start_char]
-        else:
-            info = self.char_info[char_num + self.start_char]
-
-        char_kern_table = {}
-        if info.has_ligkern():
-            for char in range(self.start_char, self.end_char + 1):
-                kern = self.ligkern_program.execute(info.ligkern_start(), char)
-                if kern:
-                    char_kern_table[char] = self.kern_table[kern]
-
-        return TfmCharMetrics(
-            self.width_table[info.width_index],
-            self.height_table[info.height_index],
-            self.depth_table[info.depth_index],
-            self.italic_table[info.italic_index],
-            char_kern_table)
-
-
-class TfmReader(object):
-    def __init__(self, f):
-        self.f = f
-
-    def read_byte(self):
-        return ord(self.f.read(1))
-
-    def read_halfword(self):
-        b1 = self.read_byte()
-        b2 = self.read_byte()
-        return (b1 << 8) | b2
-
-    def read_word(self):
-        b1 = self.read_byte()
-        b2 = self.read_byte()
-        b3 = self.read_byte()
-        b4 = self.read_byte()
-        return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4
-
-    def read_fixword(self):
-        word = self.read_word()
-
-        neg = False
-        if word & 0x80000000:
-            neg = True
-            word = (-word & 0xffffffff)
-
-        return (-1 if neg else 1) * word / float(1 << 20)
-
-    def read_bcpl(self, length):
-        str_length = self.read_byte()
-        data = self.f.read(length - 1)
-        return data[:str_length]
-
-
-def read_tfm_file(file_name):
-    with open(file_name, 'rb') as f:
-        reader = TfmReader(f)
-
-        # file_size
-        reader.read_halfword()
-        header_size = reader.read_halfword()
-
-        start_char = reader.read_halfword()
-        end_char = reader.read_halfword()
-
-        width_table_size = reader.read_halfword()
-        height_table_size = reader.read_halfword()
-        depth_table_size = reader.read_halfword()
-        italic_table_size = reader.read_halfword()
-
-        ligkern_table_size = reader.read_halfword()
-        kern_table_size = reader.read_halfword()
-
-        # extensible_table_size
-        reader.read_halfword()
-        # parameter_table_size
-        reader.read_halfword()
-
-        # checksum
-        reader.read_word()
-        # design_size
-        reader.read_fixword()
-
-        if header_size > 2:
-            # coding_scheme
-            reader.read_bcpl(40)
-
-        if header_size > 12:
-            # font_family
-            reader.read_bcpl(20)
-
-        for i in range(header_size - 17):
-            reader.read_word()
-
-        char_info = []
-        for i in range(start_char, end_char + 1):
-            char_info.append(CharInfoWord(reader.read_word()))
-
-        width_table = []
-        for i in range(width_table_size):
-            width_table.append(reader.read_fixword())
-
-        height_table = []
-        for i in range(height_table_size):
-            height_table.append(reader.read_fixword())
-
-        depth_table = []
-        for i in range(depth_table_size):
-            depth_table.append(reader.read_fixword())
-
-        italic_table = []
-        for i in range(italic_table_size):
-            italic_table.append(reader.read_fixword())
-
-        ligkern_table = []
-        for i in range(ligkern_table_size):
-            skip = reader.read_byte()
-            next_char = reader.read_byte()
-            op = reader.read_byte()
-            remainder = reader.read_byte()
-
-            ligkern_table.append((skip, next_char, op, remainder))
-
-        kern_table = []
-        for i in range(kern_table_size):
-            kern_table.append(reader.read_fixword())
-
-        # There is more information, like the ligkern, kern, extensible, and
-        # param table, but we don't need these for now
-
-        return TfmFile(start_char, end_char, char_info, width_table,
-                       height_table, depth_table, italic_table,
-                       ligkern_table, kern_table)
--- a/src/fontMetrics.js
+++ b/src/fontMetrics.js
@@ -88,7 +88,7 @@ const sigmasAndXis = {
 // metrics, including height, depth, italic correction, and skew (kern from the
 // character to the corresponding \skewchar)
 // This map is generated via `make metrics`. It should not be changed manually.
-import metricMap from "./fontMetricsData";
+import metricMap from "../submodules/katex-fonts/fontMetricsData";

 // These are very rough approximations.  We default to Times New Roman which
 // should have Latin-1 and Cyrillic characters, but may not depending on the
--- a/src/fontMetricsData.js
+++ b/src/fontMetricsData.js
--- a/src/macros.js
+++ b/src/macros.js
@@ -4,7 +4,7 @@
 * This can be used to define some commands in terms of others.
 */

-import fontMetricsData from "./fontMetricsData";
+import fontMetricsData from "../submodules/katex-fonts/fontMetricsData";
 import symbols from "./symbols";
 import utils from "./utils";
 import {Token} from "./Token";
--- a/submodules/katex-fonts
+++ b/submodules/katex-fonts