move metrics to katex-fonts submodule (#1134)

* move metrics to katex-fonts submodule

* update katex-fonts submodule

* update katex-fonts submodule after pulling down changes
This commit is contained in:
Kevin Barabash
2018-02-11 16:36:13 -05:00
committed by GitHub
parent 3f69de5036
commit 9b2101f6b4
10 changed files with 3 additions and 3898 deletions

View File

@@ -1,21 +0,0 @@
### How to generate new metrics
-------------------------------
There are several requirements for generating the metrics used by KaTeX.
- You need to have an installation of TeX which supports kpathsea. You can check
this by running `tex --version`, and seeing if it has a line that looks like
> kpathsea version 6.2.0
- You need the JSON module for perl. You can install this either from CPAN
(possibly using the `cpan` command line tool) or with your package manager.
- You need the python module fonttools. You can install this either from PyPi
(using `easy_install` or `pip`) or with your package manager.
Once you have these things, run
make metrics
which should generate new metrics and place them into `fontMetricsData.json`.
You're done!

View File

@@ -1,114 +0,0 @@
#!/usr/bin/env python
import collections
import json
import parse_tfm
import subprocess
import sys
def find_font_path(font_name):
try:
font_path = subprocess.check_output(['kpsewhich', font_name])
except OSError:
raise RuntimeError("Couldn't find kpsewhich program, make sure you" +
" have TeX installed")
except subprocess.CalledProcessError:
raise RuntimeError("Couldn't find font metrics: '%s'" % font_name)
return font_path.strip()
def main():
mapping = json.load(sys.stdin)
fonts = [
'cmbsy10.tfm',
'cmbx10.tfm',
'cmbxti10.tfm',
'cmex10.tfm',
'cmmi10.tfm',
'cmmib10.tfm',
'cmr10.tfm',
'cmsy10.tfm',
'cmti10.tfm',
'msam10.tfm',
'msbm10.tfm',
'eufm10.tfm',
'cmtt10.tfm',
'rsfs10.tfm',
'cmss10.tfm',
'cmssbx10.tfm',
'cmssi10.tfm',
]
# Extracted by running `\font\a=<font>` and then `\showthe\skewchar\a` in
# TeX, where `<font>` is the name of the font listed here. The skewchar
# will be printed out in the output. If it outputs `-1`, that means there
# is no skewchar, so we use `None` here.
font_skewchar = {
'cmbsy10': None,
'cmbx10': None,
'cmbxti10': None,
'cmex10': None,
'cmmi10': 127,
'cmmib10': None,
'cmr10': None,
'cmsy10': 48,
'cmti10': None,
'msam10': None,
'msbm10': None,
'eufm10': None,
'cmtt10': None,
'rsfs10': None,
'cmss10': None,
'cmssbx10': None,
'cmssi10': None,
}
font_name_to_tfm = {}
for font_name in fonts:
font_basename = font_name.split('.')[0]
font_path = find_font_path(font_name)
font_name_to_tfm[font_basename] = parse_tfm.read_tfm_file(font_path)
families = collections.defaultdict(dict)
for family, chars in mapping.iteritems():
for char, char_data in chars.iteritems():
char_num = int(char)
font = char_data['font']
tex_char_num = int(char_data['char'])
yshift = float(char_data['yshift'])
if family == "Script-Regular":
tfm_char = font_name_to_tfm[font].get_char_metrics(tex_char_num,
fix_rsfs=True)
else:
tfm_char = font_name_to_tfm[font].get_char_metrics(tex_char_num)
height = round(tfm_char.height + yshift / 1000.0, 5)
depth = round(tfm_char.depth - yshift / 1000.0, 5)
italic = round(tfm_char.italic_correction, 5)
width = round(tfm_char.width, 5)
skewkern = 0.0
if (font_skewchar[font] and
font_skewchar[font] in tfm_char.kern_table):
skewkern = round(
tfm_char.kern_table[font_skewchar[font]], 5)
families[family][char_num] = {
'height': height,
'depth': depth,
'italic': italic,
'skew': skewkern,
'width': width
}
sys.stdout.write(
json.dumps(families, separators=(',', ':'), sort_keys=True))
if __name__ == '__main__':
main()

View File

@@ -1,115 +0,0 @@
#!/usr/bin/env python
from fontTools.ttLib import TTFont
import sys
import json
# map of characters to extract
metrics_to_extract = {
# Font name
"AMS-Regular": {
u"\u21e2": None, # \dashrightarrow
u"\u21e0": None, # \dashleftarrow
},
"Main-Regular": {
# Skew and italic metrics can't be easily parsed from the TTF. Instead,
# we map each character to a "base character", which is a character
# from the same font with correct italic and skew metrics. A character
# maps to None if it doesn't have a base.
u"\u2260": None, # \neq
u"\u2245": None, # \cong
u"\u0020": None, # space
u"\u00a0": None, # nbsp
u"\u2026": None, # \ldots
u"\u22ef": None, # \cdots
u"\u22f1": None, # \ddots
u"\u22ee": None, # \vdots
u"\u22ee": None, # \vdots
u"\u22a8": None, # \models
u"\u22c8": None, # \bowtie
u"\u2250": None, # \doteq
u"\u23b0": None, # \lmoustache
u"\u23b1": None, # \rmoustache
u"\u27ee": None, # \lgroup
u"\u27ef": None, # \rgroup
u"\u27f5": None, # \longleftarrow
u"\u27f8": None, # \Longleftarrow
u"\u27f6": None, # \longrightarrow
u"\u27f9": None, # \Longrightarrow
u"\u27f7": None, # \longleftrightarrow
u"\u27fa": None, # \Longleftrightarrow
u"\u21a6": None, # \mapsto
u"\u27fc": None, # \longmapsto
u"\u21a9": None, # \hookleftarrow
u"\u21aa": None, # \hookrightarrow
u"\u21cc": None, # \rightleftharpoons
},
"Size1-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
"Size2-Regular": {
u"\u222c": u"\u222b", # \iint, based on \int
u"\u222d": u"\u222b", # \iiint, based on \int
},
}
def main():
start_json = json.load(sys.stdin)
for font, chars in metrics_to_extract.iteritems():
fontInfo = TTFont("../submodules/katex-fonts/fonts/KaTeX_" + font + ".ttf")
glyf = fontInfo["glyf"]
unitsPerEm = float(fontInfo["head"].unitsPerEm)
# We keep ALL Unicode cmaps, not just fontInfo["cmap"].getcmap(3, 1).
# This is playing it extra safe, since it reports inconsistencies.
# Platform 0 is Unicode, platform 3 is Windows. For platform 3,
# encoding 1 is UCS-2 and encoding 10 is UCS-4.
cmap = [t.cmap for t in fontInfo["cmap"].tables
if (t.platformID == 0)
or (t.platformID == 3 and t.platEncID in (1, 10))]
for char, base_char in chars.iteritems():
code = ord(char)
names = set(t.get(code) for t in cmap)
if not names:
sys.stderr.write(
"Codepoint {} of font {} maps to no name\n"
.format(code, font))
continue
if len(names) != 1:
sys.stderr.write(
"Codepoint {} of font {} maps to multiple names: {}\n"
.format(code, font, ", ".join(sorted(names))))
continue
name = names.pop()
height = depth = italic = skew = width = 0
glyph = glyf[name]
if glyph.numberOfContours:
height = glyph.yMax
depth = -glyph.yMin
width = glyph.xMax - glyph.xMin
if base_char:
base_char_str = str(ord(base_char))
base_metrics = start_json[font][base_char_str]
italic = base_metrics["italic"]
skew = base_metrics["skew"]
width = base_metrics["width"]
start_json[font][str(code)] = {
"height": height / unitsPerEm,
"depth": depth / unitsPerEm,
"italic": italic,
"skew": skew,
"width": width
}
sys.stdout.write(
json.dumps(start_json, separators=(',', ':'), sort_keys=True))
if __name__ == "__main__":
main()

View File

@@ -1,26 +0,0 @@
#!/usr/bin/env python
import sys
import json
props = ['depth', 'height', 'italic', 'skew']
if len(sys.argv) > 1:
if sys.argv[1] == '--width':
props.append('width')
data = json.load(sys.stdin)
sep = "export default {\n "
for font in sorted(data):
sys.stdout.write(sep + json.dumps(font))
sep = ": {\n "
for glyph in sorted(data[font], key=int):
sys.stdout.write(sep + json.dumps(glyph) + ": ")
values = [value if value != 0.0 else 0 for value in
[data[font][glyph][key] for key in props]]
sys.stdout.write(json.dumps(values))
sep = ",\n "
sep = ",\n },\n "
sys.stdout.write(",\n },\n};\n")

File diff suppressed because it is too large Load Diff

View File

@@ -1,211 +0,0 @@
class CharInfoWord(object):
def __init__(self, word):
b1, b2, b3, b4 = (word >> 24,
(word & 0xff0000) >> 16,
(word & 0xff00) >> 8,
word & 0xff)
self.width_index = b1
self.height_index = b2 >> 4
self.depth_index = b2 & 0x0f
self.italic_index = (b3 & 0b11111100) >> 2
self.tag = b3 & 0b11
self.remainder = b4
def has_ligkern(self):
return self.tag == 1
def ligkern_start(self):
return self.remainder
class LigKernProgram(object):
def __init__(self, program):
self.program = program
def execute(self, start, next_char):
curr_instruction = start
while True:
instruction = self.program[curr_instruction]
(skip, inst_next_char, op, remainder) = instruction
if inst_next_char == next_char:
if op < 128:
# Don't worry about ligatures for now, we only need kerns
return None
else:
return 256 * (op - 128) + remainder
elif skip >= 128:
return None
else:
curr_instruction += 1 + skip
class TfmCharMetrics(object):
def __init__(self, width, height, depth, italic, kern_table):
self.width = width
self.height = height
self.depth = depth
self.italic_correction = italic
self.kern_table = kern_table
class TfmFile(object):
def __init__(self, start_char, end_char, char_info, width_table,
height_table, depth_table, italic_table, ligkern_table,
kern_table):
self.start_char = start_char
self.end_char = end_char
self.char_info = char_info
self.width_table = width_table
self.height_table = height_table
self.depth_table = depth_table
self.italic_table = italic_table
self.ligkern_program = LigKernProgram(ligkern_table)
self.kern_table = kern_table
def get_char_metrics(self, char_num, fix_rsfs=False):
"""Return glyph metrics for a unicode code point.
Arguments:
char_num: a unicode code point
fix_rsfs: adjust for rsfs10.tfm's different indexing system
"""
if char_num < self.start_char or char_num > self.end_char:
raise RuntimeError("Invalid character number")
if fix_rsfs:
# all of the char_nums contained start from zero in rsfs10.tfm
info = self.char_info[char_num - self.start_char]
else:
info = self.char_info[char_num + self.start_char]
char_kern_table = {}
if info.has_ligkern():
for char in range(self.start_char, self.end_char + 1):
kern = self.ligkern_program.execute(info.ligkern_start(), char)
if kern:
char_kern_table[char] = self.kern_table[kern]
return TfmCharMetrics(
self.width_table[info.width_index],
self.height_table[info.height_index],
self.depth_table[info.depth_index],
self.italic_table[info.italic_index],
char_kern_table)
class TfmReader(object):
def __init__(self, f):
self.f = f
def read_byte(self):
return ord(self.f.read(1))
def read_halfword(self):
b1 = self.read_byte()
b2 = self.read_byte()
return (b1 << 8) | b2
def read_word(self):
b1 = self.read_byte()
b2 = self.read_byte()
b3 = self.read_byte()
b4 = self.read_byte()
return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4
def read_fixword(self):
word = self.read_word()
neg = False
if word & 0x80000000:
neg = True
word = (-word & 0xffffffff)
return (-1 if neg else 1) * word / float(1 << 20)
def read_bcpl(self, length):
str_length = self.read_byte()
data = self.f.read(length - 1)
return data[:str_length]
def read_tfm_file(file_name):
with open(file_name, 'rb') as f:
reader = TfmReader(f)
# file_size
reader.read_halfword()
header_size = reader.read_halfword()
start_char = reader.read_halfword()
end_char = reader.read_halfword()
width_table_size = reader.read_halfword()
height_table_size = reader.read_halfword()
depth_table_size = reader.read_halfword()
italic_table_size = reader.read_halfword()
ligkern_table_size = reader.read_halfword()
kern_table_size = reader.read_halfword()
# extensible_table_size
reader.read_halfword()
# parameter_table_size
reader.read_halfword()
# checksum
reader.read_word()
# design_size
reader.read_fixword()
if header_size > 2:
# coding_scheme
reader.read_bcpl(40)
if header_size > 12:
# font_family
reader.read_bcpl(20)
for i in range(header_size - 17):
reader.read_word()
char_info = []
for i in range(start_char, end_char + 1):
char_info.append(CharInfoWord(reader.read_word()))
width_table = []
for i in range(width_table_size):
width_table.append(reader.read_fixword())
height_table = []
for i in range(height_table_size):
height_table.append(reader.read_fixword())
depth_table = []
for i in range(depth_table_size):
depth_table.append(reader.read_fixword())
italic_table = []
for i in range(italic_table_size):
italic_table.append(reader.read_fixword())
ligkern_table = []
for i in range(ligkern_table_size):
skip = reader.read_byte()
next_char = reader.read_byte()
op = reader.read_byte()
remainder = reader.read_byte()
ligkern_table.append((skip, next_char, op, remainder))
kern_table = []
for i in range(kern_table_size):
kern_table.append(reader.read_fixword())
# There is more information, like the ligkern, kern, extensible, and
# param table, but we don't need these for now
return TfmFile(start_char, end_char, char_info, width_table,
height_table, depth_table, italic_table,
ligkern_table, kern_table)

View File

@@ -88,7 +88,7 @@ const sigmasAndXis = {
// metrics, including height, depth, italic correction, and skew (kern from the
// character to the corresponding \skewchar)
// This map is generated via `make metrics`. It should not be changed manually.
import metricMap from "./fontMetricsData";
import metricMap from "../submodules/katex-fonts/fontMetricsData";
// These are very rough approximations. We default to Times New Roman which
// should have Latin-1 and Cyrillic characters, but may not depending on the

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@
* This can be used to define some commands in terms of others.
*/
import fontMetricsData from "./fontMetricsData";
import fontMetricsData from "../submodules/katex-fonts/fontMetricsData";
import symbols from "./symbols";
import utils from "./utils";
import {Token} from "./Token";