Unicode accents (#992)

* Unicode accents * Lexer now looks for combining dicritical marks and adds them to the same character * Parser's `parseSymbol` now recognizes both combined and uncombined forms of Unicode accents, and builds accent objects just like the accent functions * Added CJK support to math mode (not just text mode) * Add invalid combining character test * Add MathML test * Add weak support for other Latin-1 characters This maintains backwards compatibility, but it uses the wrong font. There's a TODO to fix this later. Also refactor symbol code to use for..of * Update Unicode screenshot * Remove dot from accented i and j (in math mode) Also add dotless Unicode characters to support some accented i's and j's * Fix \imath, \jmath, \pounds, and more tests * Switch from for..of to .split().forEach() Save around 800 bytes in minified code * Fix split * normalize() detection * Convert back to vanilla for loops * Fix merge * Move normalize dependency to unicodeMake.js * Make unicodeSymbols into a lookup table instead of macros This is important for multi-accented characters. * Add comments about when to run * Move symbols definition into unicodeMake/Symbols.js * Remove CJK support in text mode * Add missing semicolon * Refactor unicodeAccents to its own file * Dotless i/j support in text mode * Remove excess character mappings * Fix Åå in math mode (still via Times) * Update to support #1030 * Add accented Greek letter support (for supported Greek symbols) * Update screenshot * remove Æ, æ, Ø, ø, and ß from math mode test
2025-10-05 11:18:39 +00:00 · 2017-12-28 22:32:45 -08:00
parent d822f04b9b
commit 484d44ee70
17 changed files with 628 additions and 104 deletions
--- a/4
+++ b/4
@@ -123,6 +123,10 @@ PYTHON=$(shell python2 --version >/dev/null 2>&1 && echo python2 || echo python)
 metrics:
 	cd metrics && $(PERL) ./mapping.pl | $(PYTHON) ./extract_tfms.py | $(PYTHON) ./extract_ttfs.py | $(PYTHON) ./format_json.py --width > ../src/fontMetricsData.js

+unicode:
+	cd src && $(NODE) unicodeMake.js >unicodeSymbols.js
+src/unicodeSymbols.js: unicode
+
 clean:
 	rm -rf build/* $(NIS)

--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -36,11 +36,16 @@ import {LexerInterface, Token} from "./Token";
 const commentRegexString = "%[^\n]*[\n]";
 const controlWordRegexString = "\\\\[a-zA-Z@]+";
 const controlSymbolRegexString = "\\\\[^\uD800-\uDFFF]";
+const combiningDiacriticalMarkString = "[\u0300-\u036f]";
+export const combiningDiacriticalMarksEndRegex =
+    new RegExp(`${combiningDiacriticalMarkString}+$`);
 const tokenRegex = new RegExp(
    "([ \r\n\t]+)|" +                                 // whitespace
-    `(${commentRegexString}|` +                       // comments
-    "[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +   // single codepoint
+    `(${commentRegexString}` +                        // comments
+    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    `${combiningDiacriticalMarkString}*` +            // ...plus accents
    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
+    `${combiningDiacriticalMarkString}*` +            // ...plus accents
    "|\\\\verb\\*([^]).*?\\3" +                       // \verb*
    "|\\\\verb([^*a-zA-Z]).*?\\4" +                   // \verb unstarred
    `|${controlWordRegexString}` +                    // \macroName
@@ -60,6 +65,7 @@ export default class Lexer implements LexerInterface {
    pos: number;

    constructor(input: string) {
+        // Separate accents from characters
        this.input = input;
        this.pos = 0;
    }
@@ -76,7 +82,7 @@ export default class Lexer implements LexerInterface {
        const match = matchAt(tokenRegex, input, pos);
        if (match === null) {
            throw new ParseError(
-                "Unexpected character: '" + input[pos] + "'",
+                `Unexpected character: '${input[pos]}'`,
                new Token(input[pos], new SourceLocation(this, pos, pos + 1)));
        }
        const text = match[2] || " ";
--- a/src/Parser.js
+++ b/src/Parser.js
@@ -1,4 +1,5 @@
 /* eslint no-constant-condition:0 */
+/* eslint no-console:0 */
 import functions from "./functions";
 import environments from "./environments";
 import MacroExpander from "./MacroExpander";
@@ -6,8 +7,11 @@ import symbols from "./symbols";
 import utils from "./utils";
 import { validUnit } from "./units";
 import { cjkRegex } from "./unicodeRegexes";
+import unicodeAccents from "./unicodeAccents";
+import unicodeSymbols from "./unicodeSymbols";
 import ParseNode from "./ParseNode";
 import ParseError from "./ParseError";
+import { combiningDiacriticalMarksEndRegex } from "./Lexer.js";

 /**
 * This file contains the parser used to parse out a TeX expression from the
@@ -1042,30 +1046,16 @@ export default class Parser {
     */
    parseSymbol() {
        const nucleus = this.nextToken;
+        let text = nucleus.text;

-        if (functions[nucleus.text]) {
+        if (functions[text]) {
            this.consume();
            // If there exists a function with this name, we return the function and
            // say that it is a function.
            return newFunction(nucleus);
-        } else if (symbols[this.mode][nucleus.text]) {
+        } else if (/^\\verb[^a-zA-Z]/.test(text)) {
            this.consume();
-            // Otherwise if this is a no-argument function, find the type it
-            // corresponds to in the symbols map
-            return newArgument(
-                new ParseNode(symbols[this.mode][nucleus.text].group,
-                            nucleus.text, this.mode, nucleus),
-                nucleus);
-        } else if (this.mode === "text" && cjkRegex.test(nucleus.text)) {
-            this.consume();
-            return newArgument(
-                new ParseNode("textord", nucleus.text, this.mode, nucleus),
-                nucleus);
-        } else if (nucleus.text === "$") {
-            return newDollar(nucleus);
-        } else if (/^\\verb[^a-zA-Z]/.test(nucleus.text)) {
-            this.consume();
-            let arg = nucleus.text.slice(5);
+            let arg = text.slice(5);
            const star = (arg.charAt(0) === "*");
            if (star) {
                arg = arg.slice(1);
@@ -1082,8 +1072,58 @@ export default class Parser {
                    body: arg,
                    star: star,
                }, "text"), nucleus);
+        } else if (text === "$") {
+            return newDollar(nucleus);
+        }
+        // At this point, we should have a symbol, possibly with accents.
+        // First expand any accented base symbol according to unicodeSymbols.
+        if (unicodeSymbols.hasOwnProperty(text[0]) &&
+            !symbols[this.mode][text[0]]) {
+            text = unicodeSymbols[text[0]] + text.substr(1);
+        }
+        // Strip off any combining characters
+        const match = combiningDiacriticalMarksEndRegex.exec(text);
+        if (match) {
+            text = text.substring(0, match.index);
+            if (text === 'i') {
+                text = '\u0131';  // dotless i, in math and text mode
+            } else if (text === 'j') {
+                text = '\u0237';  // dotless j, in math and text mode
+            }
+        }
+        // Recognize base symbol
+        let symbol = null;
+        if (symbols[this.mode][text]) {
+            symbol = new ParseNode(symbols[this.mode][text].group,
+                            text, this.mode, nucleus);
+        } else if (this.mode === "text" && cjkRegex.test(text)) {
+            symbol = new ParseNode("textord", text, this.mode, nucleus);
        } else {
-            return null;
+            return null;  // EOF, ^, _, {, }, etc.
+        }
+        this.consume();
+        // Transform combining characters into accents
+        if (match) {
+            for (let i = 0; i < match[0].length; i++) {
+                const accent = match[0][i];
+                if (!unicodeAccents[accent]) {
+                    throw new ParseError(`Unknown accent ' ${accent}'`, nucleus);
+                }
+                const command = unicodeAccents[accent][this.mode];
+                if (!command) {
+                    throw new ParseError(
+                        `Accent ${accent} unsupported in ${this.mode} mode`,
+                        nucleus);
+                }
+                symbol = new ParseNode("accent", {
+                    type: "accent",
+                    label: command,
+                    isStretchy: false,
+                    isShifty: true,
+                    base: symbol,
+                }, this.mode, nucleus);
            }
        }
+        return newArgument(symbol, nucleus);
+    }
 }
--- a/src/buildCommon.js
+++ b/src/buildCommon.js
@@ -19,9 +19,9 @@ import type {DomChildNode, CombinableDomNode} from "./domTree";

 // The following have to be loaded from Main-Italic font, using class mainit
 const mainitLetters = [
-    "\\imath",   // dotless i
-    "\\jmath",   // dotless j
-    "\\pounds",  // pounds symbol
+    "\\imath", "ı",       // dotless i
+    "\\jmath", "ȷ",       // dotless j
+    "\\pounds", "\\mathsterling", "\\textsterling", "£",   // pounds symbol
 ];

 /**
--- a/src/fontMetrics.js
+++ b/src/fontMetrics.js
@@ -97,63 +97,14 @@ import metricMap from "./fontMetricsData";
 // TODO(kevinb) allow union of multiple glyph metrics for better accuracy.
 const extraCharacterMap = {
    // Latin-1
-    'À': 'A',
-    'Á': 'A',
-    'Â': 'A',
-    'Ã': 'A',
-    'Ä': 'A',
    'Å': 'A',
    'Ç': 'C',
-    'È': 'E',
-    'É': 'E',
-    'Ê': 'E',
-    'Ë': 'E',
-    'Ì': 'I',
-    'Í': 'I',
-    'Î': 'I',
-    'Ï': 'I',
    'Ð': 'D',
-    'Ñ': 'N',
-    'Ò': 'O',
-    'Ó': 'O',
-    'Ô': 'O',
-    'Õ': 'O',
-    'Ö': 'O',
-    'Ù': 'U',
-    'Ú': 'U',
-    'Û': 'U',
-    'Ü': 'U',
-    'Ý': 'Y',
    'Þ': 'o',
-    'à': 'a',
-    'á': 'a',
-    'â': 'a',
-    'ã': 'a',
-    'ä': 'a',
    'å': 'a',
    'ç': 'c',
-    'è': 'e',
-    'é': 'e',
-    'ê': 'e',
-    'ë': 'e',
-    'ì': 'i',
-    'í': 'i',
-    'î': 'i',
-    'ï': 'i',
    'ð': 'd',
-    'ñ': 'n',
-    'ò': 'o',
-    'ó': 'o',
-    'ô': 'o',
-    'õ': 'o',
-    'ö': 'o',
-    'ù': 'u',
-    'ú': 'u',
-    'û': 'u',
-    'ü': 'u',
-    'ý': 'y',
    'þ': 'o',
-    'ÿ': 'y',

    // Cyrillic
    'А': 'A',
--- a/src/macros.js
+++ b/src/macros.js
@@ -45,7 +45,7 @@ const builtinMacros: MacroMap = {};
 export default builtinMacros;

 // This function might one day accept an additional argument and do more things.
-function defineMacro(name: string, body: MacroDefinition) {
+export function defineMacro(name: string, body: MacroDefinition) {
    builtinMacros[name] = body;
 }

--- a/src/symbols.js
+++ b/src/symbols.js
@@ -644,10 +644,10 @@ defineSymbol(math, main, accent, "\u02c7", "\\check");
 defineSymbol(math, main, accent, "\u005e", "\\hat");
 defineSymbol(math, main, accent, "\u20d7", "\\vec");
 defineSymbol(math, main, accent, "\u02d9", "\\dot");
-defineSymbol(math, main, mathord, "\u0131", "\\imath");
-defineSymbol(math, main, mathord, "\u0237", "\\jmath");
-defineSymbol(text, main, textord, "\u0131", "\\i");
-defineSymbol(text, main, textord, "\u0237", "\\j");
+defineSymbol(math, main, mathord, "\u0131", "\\imath", true);
+defineSymbol(math, main, mathord, "\u0237", "\\jmath", true);
+defineSymbol(text, main, textord, "\u0131", "\\i", true);
+defineSymbol(text, main, textord, "\u0237", "\\j", true);
 defineSymbol(text, main, textord, "\u00df", "\\ss", true);
 defineSymbol(text, main, textord, "\u00e6", "\\ae", true);
 defineSymbol(text, main, textord, "\u00e6", "\\ae", true);
@@ -687,7 +687,7 @@ defineSymbol(text, main, textord, "\u00b0", "\\degree");
 defineSymbol(math, main, mathord, "\u00a3", "\\pounds");
 defineSymbol(math, main, mathord, "\u00a3", "\\mathsterling", true);
 defineSymbol(text, main, mathord, "\u00a3", "\\pounds");
-defineSymbol(text, main, mathord, "\u00a3", "\\textsterling");
+defineSymbol(text, main, mathord, "\u00a3", "\\textsterling", true);
 defineSymbol(math, ams, textord, "\u2720", "\\maltese");
 defineSymbol(text, ams, textord, "\u2720", "\\maltese");

@@ -719,23 +719,20 @@ for (let i = 0; i < letters.length; i++) {
    defineSymbol(text, main, textord, ch, ch);
 }

-// Latin-1 letters
-for (let i = 0x00C0; i <= 0x00D6; i++) {
-    const ch = String.fromCharCode(i);
+// We add these Latin-1 letters as symbols for backwards-compatibility,
+// but they are not actually in the font, nor are they supported by the
+// Unicode accent mechanism, so they fall back to Times font and look ugly.
+// TODO(edemaine): Fix this.
+const extraLatin = "ÇÐÞçðþ";
+for (let i = 0; i < extraLatin.length; i++) {
+    const ch = extraLatin.charAt(i);
    defineSymbol(math, main, mathord, ch, ch);
    defineSymbol(text, main, textord, ch, ch);
 }
-
-for (let i = 0x00D8; i <= 0x00F6; i++) {
-    const ch = String.fromCharCode(i);
+const extraLatinMath = "Åå";
+for (let i = 0; i < extraLatinMath.length; i++) {
+    const ch = extraLatinMath.charAt(i);
    defineSymbol(math, main, mathord, ch, ch);
-    defineSymbol(text, main, textord, ch, ch);
-}
-
-for (let i = 0x00F8; i <= 0x00FF; i++) {
-    const ch = String.fromCharCode(i);
-    defineSymbol(math, main, mathord, ch, ch);
-    defineSymbol(text, main, textord, ch, ch);
 }

 // Cyrillic
--- a/src/unicodeAccents.js
+++ b/src/unicodeAccents.js
@@ -0,0 +1,18 @@
+// Mapping of Unicode accent characters to their LaTeX equivalent in text and
+// math mode (when they exist).
+
+// NOTE: This module needs to be written with Node-style modules (not
+// ES6 modules) so that unicodeMake.js (a Node application) can import it.
+module.exports = {
+    '\u0301': {text: "\\'", math: '\\acute'},
+    '\u0300': {text: '\\`', math: '\\grave'},
+    '\u0308': {text: '\\"', math: '\\ddot'},
+    '\u0303': {text: '\\~', math: '\\tilde'},
+    '\u0304': {text: '\\=', math: '\\bar'},
+    '\u0306': {text: '\\u', math: '\\breve'},
+    '\u030c': {text: '\\v', math: '\\check'},
+    '\u0302': {text: '\\^', math: '\\hat'},
+    '\u0307': {text: '\\.', math: '\\dot'},
+    '\u030a': {text: '\\r'},
+    '\u030b': {text: '\\H'},
+};
--- a/src/unicodeMake.js
+++ b/src/unicodeMake.js
@@ -0,0 +1,59 @@
+/* eslint no-console:0 */
+
+// This is an internal Node tool, not part of the KaTeX distribution,
+// whose purpose is to generate unicodeSymbols.js in this directory.
+// In this way, only this tool, and not the distribution/browser,
+// needs String's normalize function.
+//
+// This tool should be run (via `node unicodeMake.js` or `make unicode`)
+// whenever KaTeX adds support for new accents, and whenever
+// the Unicode spec adds new symbols that should be supported.
+
+const accents = require('./unicodeAccents');
+
+console.log("// This file is GENERATED by unicodeMake.js. DO NOT MODIFY.");
+console.log("");
+
+const encode = function(string) {
+    let output = '"';
+    for (let i = 0; i < string.length; i++) {
+        let hex = string.charCodeAt(i).toString(16);
+        while (hex.length < 4) {
+            hex = `0${hex}`;
+        }
+        output += `\\u${hex}`;
+    }
+    output = `${output}"`;
+    return output;
+};
+
+console.log("export default {");
+
+const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +
+    "αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψωΓΔΘΛΞΠΣΥΦΨΩ";
+for (const letter of letters) {
+    for (const accent of Object.getOwnPropertyNames(accents)) {
+        const combined = letter + accent;
+        const normalized = combined.normalize('NFC');
+        if (normalized.length === 1) {
+            console.log(
+                `    ${encode(normalized)}: ${encode(combined)},`
+                + `  // ${normalized} = ${accents[accent].text}{${letter}}`);
+        }
+        for (const accent2 of Object.getOwnPropertyNames(accents)) {
+            if (accent === accent2) {
+                continue;
+            }
+            const combined2 = combined + accent2;
+            const normalized2 = combined2.normalize('NFC');
+            if (normalized2.length === 1) {
+                console.log(
+                    `    ${encode(normalized2)}: ${encode(combined2)},`
+                    + `  // ${normalized2} = ${accents[accent].text}`
+                    + `${accents[accent2].text}{${letter}}`);
+            }
+        }
+    }
+}
+
+console.log("};");
--- a/src/unicodeSymbols.js
+++ b/src/unicodeSymbols.js
@@ -0,0 +1,322 @@
+// This file is GENERATED by unicodeMake.js. DO NOT MODIFY.
+
+export default {
+    "\u00e1": "\u0061\u0301",  // á = \'{a}
+    "\u00e0": "\u0061\u0300",  // à = \`{a}
+    "\u00e4": "\u0061\u0308",  // ä = \"{a}
+    "\u01df": "\u0061\u0308\u0304",  // ǟ = \"\={a}
+    "\u00e3": "\u0061\u0303",  // ã = \~{a}
+    "\u0101": "\u0061\u0304",  // ā = \={a}
+    "\u0103": "\u0061\u0306",  // ă = \u{a}
+    "\u1eaf": "\u0061\u0306\u0301",  // ắ = \u\'{a}
+    "\u1eb1": "\u0061\u0306\u0300",  // ằ = \u\`{a}
+    "\u1eb5": "\u0061\u0306\u0303",  // ẵ = \u\~{a}
+    "\u01ce": "\u0061\u030c",  // ǎ = \v{a}
+    "\u00e2": "\u0061\u0302",  // â = \^{a}
+    "\u1ea5": "\u0061\u0302\u0301",  // ấ = \^\'{a}
+    "\u1ea7": "\u0061\u0302\u0300",  // ầ = \^\`{a}
+    "\u1eab": "\u0061\u0302\u0303",  // ẫ = \^\~{a}
+    "\u0227": "\u0061\u0307",  // ȧ = \.{a}
+    "\u01e1": "\u0061\u0307\u0304",  // ǡ = \.\={a}
+    "\u00e5": "\u0061\u030a",  // å = \r{a}
+    "\u01fb": "\u0061\u030a\u0301",  // ǻ = \r\'{a}
+    "\u1e03": "\u0062\u0307",  // ḃ = \.{b}
+    "\u0107": "\u0063\u0301",  // ć = \'{c}
+    "\u010d": "\u0063\u030c",  // č = \v{c}
+    "\u0109": "\u0063\u0302",  // ĉ = \^{c}
+    "\u010b": "\u0063\u0307",  // ċ = \.{c}
+    "\u010f": "\u0064\u030c",  // ď = \v{d}
+    "\u1e0b": "\u0064\u0307",  // ḋ = \.{d}
+    "\u00e9": "\u0065\u0301",  // é = \'{e}
+    "\u00e8": "\u0065\u0300",  // è = \`{e}
+    "\u00eb": "\u0065\u0308",  // ë = \"{e}
+    "\u1ebd": "\u0065\u0303",  // ẽ = \~{e}
+    "\u0113": "\u0065\u0304",  // ē = \={e}
+    "\u1e17": "\u0065\u0304\u0301",  // ḗ = \=\'{e}
+    "\u1e15": "\u0065\u0304\u0300",  // ḕ = \=\`{e}
+    "\u0115": "\u0065\u0306",  // ĕ = \u{e}
+    "\u011b": "\u0065\u030c",  // ě = \v{e}
+    "\u00ea": "\u0065\u0302",  // ê = \^{e}
+    "\u1ebf": "\u0065\u0302\u0301",  // ế = \^\'{e}
+    "\u1ec1": "\u0065\u0302\u0300",  // ề = \^\`{e}
+    "\u1ec5": "\u0065\u0302\u0303",  // ễ = \^\~{e}
+    "\u0117": "\u0065\u0307",  // ė = \.{e}
+    "\u1e1f": "\u0066\u0307",  // ḟ = \.{f}
+    "\u01f5": "\u0067\u0301",  // ǵ = \'{g}
+    "\u1e21": "\u0067\u0304",  // ḡ = \={g}
+    "\u011f": "\u0067\u0306",  // ğ = \u{g}
+    "\u01e7": "\u0067\u030c",  // ǧ = \v{g}
+    "\u011d": "\u0067\u0302",  // ĝ = \^{g}
+    "\u0121": "\u0067\u0307",  // ġ = \.{g}
+    "\u1e27": "\u0068\u0308",  // ḧ = \"{h}
+    "\u021f": "\u0068\u030c",  // ȟ = \v{h}
+    "\u0125": "\u0068\u0302",  // ĥ = \^{h}
+    "\u1e23": "\u0068\u0307",  // ḣ = \.{h}
+    "\u00ed": "\u0069\u0301",  // í = \'{i}
+    "\u00ec": "\u0069\u0300",  // ì = \`{i}
+    "\u00ef": "\u0069\u0308",  // ï = \"{i}
+    "\u1e2f": "\u0069\u0308\u0301",  // ḯ = \"\'{i}
+    "\u0129": "\u0069\u0303",  // ĩ = \~{i}
+    "\u012b": "\u0069\u0304",  // ī = \={i}
+    "\u012d": "\u0069\u0306",  // ĭ = \u{i}
+    "\u01d0": "\u0069\u030c",  // ǐ = \v{i}
+    "\u00ee": "\u0069\u0302",  // î = \^{i}
+    "\u01f0": "\u006a\u030c",  // ǰ = \v{j}
+    "\u0135": "\u006a\u0302",  // ĵ = \^{j}
+    "\u1e31": "\u006b\u0301",  // ḱ = \'{k}
+    "\u01e9": "\u006b\u030c",  // ǩ = \v{k}
+    "\u013a": "\u006c\u0301",  // ĺ = \'{l}
+    "\u013e": "\u006c\u030c",  // ľ = \v{l}
+    "\u1e3f": "\u006d\u0301",  // ḿ = \'{m}
+    "\u1e41": "\u006d\u0307",  // ṁ = \.{m}
+    "\u0144": "\u006e\u0301",  // ń = \'{n}
+    "\u01f9": "\u006e\u0300",  // ǹ = \`{n}
+    "\u00f1": "\u006e\u0303",  // ñ = \~{n}
+    "\u0148": "\u006e\u030c",  // ň = \v{n}
+    "\u1e45": "\u006e\u0307",  // ṅ = \.{n}
+    "\u00f3": "\u006f\u0301",  // ó = \'{o}
+    "\u00f2": "\u006f\u0300",  // ò = \`{o}
+    "\u00f6": "\u006f\u0308",  // ö = \"{o}
+    "\u022b": "\u006f\u0308\u0304",  // ȫ = \"\={o}
+    "\u00f5": "\u006f\u0303",  // õ = \~{o}
+    "\u1e4d": "\u006f\u0303\u0301",  // ṍ = \~\'{o}
+    "\u1e4f": "\u006f\u0303\u0308",  // ṏ = \~\"{o}
+    "\u022d": "\u006f\u0303\u0304",  // ȭ = \~\={o}
+    "\u014d": "\u006f\u0304",  // ō = \={o}
+    "\u1e53": "\u006f\u0304\u0301",  // ṓ = \=\'{o}
+    "\u1e51": "\u006f\u0304\u0300",  // ṑ = \=\`{o}
+    "\u014f": "\u006f\u0306",  // ŏ = \u{o}
+    "\u01d2": "\u006f\u030c",  // ǒ = \v{o}
+    "\u00f4": "\u006f\u0302",  // ô = \^{o}
+    "\u1ed1": "\u006f\u0302\u0301",  // ố = \^\'{o}
+    "\u1ed3": "\u006f\u0302\u0300",  // ồ = \^\`{o}
+    "\u1ed7": "\u006f\u0302\u0303",  // ỗ = \^\~{o}
+    "\u022f": "\u006f\u0307",  // ȯ = \.{o}
+    "\u0231": "\u006f\u0307\u0304",  // ȱ = \.\={o}
+    "\u0151": "\u006f\u030b",  // ő = \H{o}
+    "\u1e55": "\u0070\u0301",  // ṕ = \'{p}
+    "\u1e57": "\u0070\u0307",  // ṗ = \.{p}
+    "\u0155": "\u0072\u0301",  // ŕ = \'{r}
+    "\u0159": "\u0072\u030c",  // ř = \v{r}
+    "\u1e59": "\u0072\u0307",  // ṙ = \.{r}
+    "\u015b": "\u0073\u0301",  // ś = \'{s}
+    "\u1e65": "\u0073\u0301\u0307",  // ṥ = \'\.{s}
+    "\u0161": "\u0073\u030c",  // š = \v{s}
+    "\u1e67": "\u0073\u030c\u0307",  // ṧ = \v\.{s}
+    "\u015d": "\u0073\u0302",  // ŝ = \^{s}
+    "\u1e61": "\u0073\u0307",  // ṡ = \.{s}
+    "\u1e97": "\u0074\u0308",  // ẗ = \"{t}
+    "\u0165": "\u0074\u030c",  // ť = \v{t}
+    "\u1e6b": "\u0074\u0307",  // ṫ = \.{t}
+    "\u00fa": "\u0075\u0301",  // ú = \'{u}
+    "\u00f9": "\u0075\u0300",  // ù = \`{u}
+    "\u00fc": "\u0075\u0308",  // ü = \"{u}
+    "\u01d8": "\u0075\u0308\u0301",  // ǘ = \"\'{u}
+    "\u01dc": "\u0075\u0308\u0300",  // ǜ = \"\`{u}
+    "\u01d6": "\u0075\u0308\u0304",  // ǖ = \"\={u}
+    "\u01da": "\u0075\u0308\u030c",  // ǚ = \"\v{u}
+    "\u0169": "\u0075\u0303",  // ũ = \~{u}
+    "\u1e79": "\u0075\u0303\u0301",  // ṹ = \~\'{u}
+    "\u016b": "\u0075\u0304",  // ū = \={u}
+    "\u1e7b": "\u0075\u0304\u0308",  // ṻ = \=\"{u}
+    "\u016d": "\u0075\u0306",  // ŭ = \u{u}
+    "\u01d4": "\u0075\u030c",  // ǔ = \v{u}
+    "\u00fb": "\u0075\u0302",  // û = \^{u}
+    "\u016f": "\u0075\u030a",  // ů = \r{u}
+    "\u0171": "\u0075\u030b",  // ű = \H{u}
+    "\u1e7d": "\u0076\u0303",  // ṽ = \~{v}
+    "\u1e83": "\u0077\u0301",  // ẃ = \'{w}
+    "\u1e81": "\u0077\u0300",  // ẁ = \`{w}
+    "\u1e85": "\u0077\u0308",  // ẅ = \"{w}
+    "\u0175": "\u0077\u0302",  // ŵ = \^{w}
+    "\u1e87": "\u0077\u0307",  // ẇ = \.{w}
+    "\u1e98": "\u0077\u030a",  // ẘ = \r{w}
+    "\u1e8d": "\u0078\u0308",  // ẍ = \"{x}
+    "\u1e8b": "\u0078\u0307",  // ẋ = \.{x}
+    "\u00fd": "\u0079\u0301",  // ý = \'{y}
+    "\u1ef3": "\u0079\u0300",  // ỳ = \`{y}
+    "\u00ff": "\u0079\u0308",  // ÿ = \"{y}
+    "\u1ef9": "\u0079\u0303",  // ỹ = \~{y}
+    "\u0233": "\u0079\u0304",  // ȳ = \={y}
+    "\u0177": "\u0079\u0302",  // ŷ = \^{y}
+    "\u1e8f": "\u0079\u0307",  // ẏ = \.{y}
+    "\u1e99": "\u0079\u030a",  // ẙ = \r{y}
+    "\u017a": "\u007a\u0301",  // ź = \'{z}
+    "\u017e": "\u007a\u030c",  // ž = \v{z}
+    "\u1e91": "\u007a\u0302",  // ẑ = \^{z}
+    "\u017c": "\u007a\u0307",  // ż = \.{z}
+    "\u00c1": "\u0041\u0301",  // Á = \'{A}
+    "\u00c0": "\u0041\u0300",  // À = \`{A}
+    "\u00c4": "\u0041\u0308",  // Ä = \"{A}
+    "\u01de": "\u0041\u0308\u0304",  // Ǟ = \"\={A}
+    "\u00c3": "\u0041\u0303",  // Ã = \~{A}
+    "\u0100": "\u0041\u0304",  // Ā = \={A}
+    "\u0102": "\u0041\u0306",  // Ă = \u{A}
+    "\u1eae": "\u0041\u0306\u0301",  // Ắ = \u\'{A}
+    "\u1eb0": "\u0041\u0306\u0300",  // Ằ = \u\`{A}
+    "\u1eb4": "\u0041\u0306\u0303",  // Ẵ = \u\~{A}
+    "\u01cd": "\u0041\u030c",  // Ǎ = \v{A}
+    "\u00c2": "\u0041\u0302",  // Â = \^{A}
+    "\u1ea4": "\u0041\u0302\u0301",  // Ấ = \^\'{A}
+    "\u1ea6": "\u0041\u0302\u0300",  // Ầ = \^\`{A}
+    "\u1eaa": "\u0041\u0302\u0303",  // Ẫ = \^\~{A}
+    "\u0226": "\u0041\u0307",  // Ȧ = \.{A}
+    "\u01e0": "\u0041\u0307\u0304",  // Ǡ = \.\={A}
+    "\u00c5": "\u0041\u030a",  // Å = \r{A}
+    "\u01fa": "\u0041\u030a\u0301",  // Ǻ = \r\'{A}
+    "\u1e02": "\u0042\u0307",  // Ḃ = \.{B}
+    "\u0106": "\u0043\u0301",  // Ć = \'{C}
+    "\u010c": "\u0043\u030c",  // Č = \v{C}
+    "\u0108": "\u0043\u0302",  // Ĉ = \^{C}
+    "\u010a": "\u0043\u0307",  // Ċ = \.{C}
+    "\u010e": "\u0044\u030c",  // Ď = \v{D}
+    "\u1e0a": "\u0044\u0307",  // Ḋ = \.{D}
+    "\u00c9": "\u0045\u0301",  // É = \'{E}
+    "\u00c8": "\u0045\u0300",  // È = \`{E}
+    "\u00cb": "\u0045\u0308",  // Ë = \"{E}
+    "\u1ebc": "\u0045\u0303",  // Ẽ = \~{E}
+    "\u0112": "\u0045\u0304",  // Ē = \={E}
+    "\u1e16": "\u0045\u0304\u0301",  // Ḗ = \=\'{E}
+    "\u1e14": "\u0045\u0304\u0300",  // Ḕ = \=\`{E}
+    "\u0114": "\u0045\u0306",  // Ĕ = \u{E}
+    "\u011a": "\u0045\u030c",  // Ě = \v{E}
+    "\u00ca": "\u0045\u0302",  // Ê = \^{E}
+    "\u1ebe": "\u0045\u0302\u0301",  // Ế = \^\'{E}
+    "\u1ec0": "\u0045\u0302\u0300",  // Ề = \^\`{E}
+    "\u1ec4": "\u0045\u0302\u0303",  // Ễ = \^\~{E}
+    "\u0116": "\u0045\u0307",  // Ė = \.{E}
+    "\u1e1e": "\u0046\u0307",  // Ḟ = \.{F}
+    "\u01f4": "\u0047\u0301",  // Ǵ = \'{G}
+    "\u1e20": "\u0047\u0304",  // Ḡ = \={G}
+    "\u011e": "\u0047\u0306",  // Ğ = \u{G}
+    "\u01e6": "\u0047\u030c",  // Ǧ = \v{G}
+    "\u011c": "\u0047\u0302",  // Ĝ = \^{G}
+    "\u0120": "\u0047\u0307",  // Ġ = \.{G}
+    "\u1e26": "\u0048\u0308",  // Ḧ = \"{H}
+    "\u021e": "\u0048\u030c",  // Ȟ = \v{H}
+    "\u0124": "\u0048\u0302",  // Ĥ = \^{H}
+    "\u1e22": "\u0048\u0307",  // Ḣ = \.{H}
+    "\u00cd": "\u0049\u0301",  // Í = \'{I}
+    "\u00cc": "\u0049\u0300",  // Ì = \`{I}
+    "\u00cf": "\u0049\u0308",  // Ï = \"{I}
+    "\u1e2e": "\u0049\u0308\u0301",  // Ḯ = \"\'{I}
+    "\u0128": "\u0049\u0303",  // Ĩ = \~{I}
+    "\u012a": "\u0049\u0304",  // Ī = \={I}
+    "\u012c": "\u0049\u0306",  // Ĭ = \u{I}
+    "\u01cf": "\u0049\u030c",  // Ǐ = \v{I}
+    "\u00ce": "\u0049\u0302",  // Î = \^{I}
+    "\u0130": "\u0049\u0307",  // İ = \.{I}
+    "\u0134": "\u004a\u0302",  // Ĵ = \^{J}
+    "\u1e30": "\u004b\u0301",  // Ḱ = \'{K}
+    "\u01e8": "\u004b\u030c",  // Ǩ = \v{K}
+    "\u0139": "\u004c\u0301",  // Ĺ = \'{L}
+    "\u013d": "\u004c\u030c",  // Ľ = \v{L}
+    "\u1e3e": "\u004d\u0301",  // Ḿ = \'{M}
+    "\u1e40": "\u004d\u0307",  // Ṁ = \.{M}
+    "\u0143": "\u004e\u0301",  // Ń = \'{N}
+    "\u01f8": "\u004e\u0300",  // Ǹ = \`{N}
+    "\u00d1": "\u004e\u0303",  // Ñ = \~{N}
+    "\u0147": "\u004e\u030c",  // Ň = \v{N}
+    "\u1e44": "\u004e\u0307",  // Ṅ = \.{N}
+    "\u00d3": "\u004f\u0301",  // Ó = \'{O}
+    "\u00d2": "\u004f\u0300",  // Ò = \`{O}
+    "\u00d6": "\u004f\u0308",  // Ö = \"{O}
+    "\u022a": "\u004f\u0308\u0304",  // Ȫ = \"\={O}
+    "\u00d5": "\u004f\u0303",  // Õ = \~{O}
+    "\u1e4c": "\u004f\u0303\u0301",  // Ṍ = \~\'{O}
+    "\u1e4e": "\u004f\u0303\u0308",  // Ṏ = \~\"{O}
+    "\u022c": "\u004f\u0303\u0304",  // Ȭ = \~\={O}
+    "\u014c": "\u004f\u0304",  // Ō = \={O}
+    "\u1e52": "\u004f\u0304\u0301",  // Ṓ = \=\'{O}
+    "\u1e50": "\u004f\u0304\u0300",  // Ṑ = \=\`{O}
+    "\u014e": "\u004f\u0306",  // Ŏ = \u{O}
+    "\u01d1": "\u004f\u030c",  // Ǒ = \v{O}
+    "\u00d4": "\u004f\u0302",  // Ô = \^{O}
+    "\u1ed0": "\u004f\u0302\u0301",  // Ố = \^\'{O}
+    "\u1ed2": "\u004f\u0302\u0300",  // Ồ = \^\`{O}
+    "\u1ed6": "\u004f\u0302\u0303",  // Ỗ = \^\~{O}
+    "\u022e": "\u004f\u0307",  // Ȯ = \.{O}
+    "\u0230": "\u004f\u0307\u0304",  // Ȱ = \.\={O}
+    "\u0150": "\u004f\u030b",  // Ő = \H{O}
+    "\u1e54": "\u0050\u0301",  // Ṕ = \'{P}
+    "\u1e56": "\u0050\u0307",  // Ṗ = \.{P}
+    "\u0154": "\u0052\u0301",  // Ŕ = \'{R}
+    "\u0158": "\u0052\u030c",  // Ř = \v{R}
+    "\u1e58": "\u0052\u0307",  // Ṙ = \.{R}
+    "\u015a": "\u0053\u0301",  // Ś = \'{S}
+    "\u1e64": "\u0053\u0301\u0307",  // Ṥ = \'\.{S}
+    "\u0160": "\u0053\u030c",  // Š = \v{S}
+    "\u1e66": "\u0053\u030c\u0307",  // Ṧ = \v\.{S}
+    "\u015c": "\u0053\u0302",  // Ŝ = \^{S}
+    "\u1e60": "\u0053\u0307",  // Ṡ = \.{S}
+    "\u0164": "\u0054\u030c",  // Ť = \v{T}
+    "\u1e6a": "\u0054\u0307",  // Ṫ = \.{T}
+    "\u00da": "\u0055\u0301",  // Ú = \'{U}
+    "\u00d9": "\u0055\u0300",  // Ù = \`{U}
+    "\u00dc": "\u0055\u0308",  // Ü = \"{U}
+    "\u01d7": "\u0055\u0308\u0301",  // Ǘ = \"\'{U}
+    "\u01db": "\u0055\u0308\u0300",  // Ǜ = \"\`{U}
+    "\u01d5": "\u0055\u0308\u0304",  // Ǖ = \"\={U}
+    "\u01d9": "\u0055\u0308\u030c",  // Ǚ = \"\v{U}
+    "\u0168": "\u0055\u0303",  // Ũ = \~{U}
+    "\u1e78": "\u0055\u0303\u0301",  // Ṹ = \~\'{U}
+    "\u016a": "\u0055\u0304",  // Ū = \={U}
+    "\u1e7a": "\u0055\u0304\u0308",  // Ṻ = \=\"{U}
+    "\u016c": "\u0055\u0306",  // Ŭ = \u{U}
+    "\u01d3": "\u0055\u030c",  // Ǔ = \v{U}
+    "\u00db": "\u0055\u0302",  // Û = \^{U}
+    "\u016e": "\u0055\u030a",  // Ů = \r{U}
+    "\u0170": "\u0055\u030b",  // Ű = \H{U}
+    "\u1e7c": "\u0056\u0303",  // Ṽ = \~{V}
+    "\u1e82": "\u0057\u0301",  // Ẃ = \'{W}
+    "\u1e80": "\u0057\u0300",  // Ẁ = \`{W}
+    "\u1e84": "\u0057\u0308",  // Ẅ = \"{W}
+    "\u0174": "\u0057\u0302",  // Ŵ = \^{W}
+    "\u1e86": "\u0057\u0307",  // Ẇ = \.{W}
+    "\u1e8c": "\u0058\u0308",  // Ẍ = \"{X}
+    "\u1e8a": "\u0058\u0307",  // Ẋ = \.{X}
+    "\u00dd": "\u0059\u0301",  // Ý = \'{Y}
+    "\u1ef2": "\u0059\u0300",  // Ỳ = \`{Y}
+    "\u0178": "\u0059\u0308",  // Ÿ = \"{Y}
+    "\u1ef8": "\u0059\u0303",  // Ỹ = \~{Y}
+    "\u0232": "\u0059\u0304",  // Ȳ = \={Y}
+    "\u0176": "\u0059\u0302",  // Ŷ = \^{Y}
+    "\u1e8e": "\u0059\u0307",  // Ẏ = \.{Y}
+    "\u0179": "\u005a\u0301",  // Ź = \'{Z}
+    "\u017d": "\u005a\u030c",  // Ž = \v{Z}
+    "\u1e90": "\u005a\u0302",  // Ẑ = \^{Z}
+    "\u017b": "\u005a\u0307",  // Ż = \.{Z}
+    "\u03ac": "\u03b1\u0301",  // ά = \'{α}
+    "\u1f70": "\u03b1\u0300",  // ὰ = \`{α}
+    "\u1fb1": "\u03b1\u0304",  // ᾱ = \={α}
+    "\u1fb0": "\u03b1\u0306",  // ᾰ = \u{α}
+    "\u03ad": "\u03b5\u0301",  // έ = \'{ε}
+    "\u1f72": "\u03b5\u0300",  // ὲ = \`{ε}
+    "\u03ae": "\u03b7\u0301",  // ή = \'{η}
+    "\u1f74": "\u03b7\u0300",  // ὴ = \`{η}
+    "\u03af": "\u03b9\u0301",  // ί = \'{ι}
+    "\u1f76": "\u03b9\u0300",  // ὶ = \`{ι}
+    "\u03ca": "\u03b9\u0308",  // ϊ = \"{ι}
+    "\u0390": "\u03b9\u0308\u0301",  // ΐ = \"\'{ι}
+    "\u1fd2": "\u03b9\u0308\u0300",  // ῒ = \"\`{ι}
+    "\u1fd1": "\u03b9\u0304",  // ῑ = \={ι}
+    "\u1fd0": "\u03b9\u0306",  // ῐ = \u{ι}
+    "\u03cc": "\u03bf\u0301",  // ό = \'{ο}
+    "\u1f78": "\u03bf\u0300",  // ὸ = \`{ο}
+    "\u03cd": "\u03c5\u0301",  // ύ = \'{υ}
+    "\u1f7a": "\u03c5\u0300",  // ὺ = \`{υ}
+    "\u03cb": "\u03c5\u0308",  // ϋ = \"{υ}
+    "\u03b0": "\u03c5\u0308\u0301",  // ΰ = \"\'{υ}
+    "\u1fe2": "\u03c5\u0308\u0300",  // ῢ = \"\`{υ}
+    "\u1fe1": "\u03c5\u0304",  // ῡ = \={υ}
+    "\u1fe0": "\u03c5\u0306",  // ῠ = \u{υ}
+    "\u03ce": "\u03c9\u0301",  // ώ = \'{ω}
+    "\u1f7c": "\u03c9\u0300",  // ὼ = \`{ω}
+    "\u038e": "\u03a5\u0301",  // Ύ = \'{Υ}
+    "\u1fea": "\u03a5\u0300",  // Ὺ = \`{Υ}
+    "\u03ab": "\u03a5\u0308",  // Ϋ = \"{Υ}
+    "\u1fe9": "\u03a5\u0304",  // Ῡ = \={Υ}
+    "\u1fe8": "\u03a5\u0306",  // Ῠ = \u{Υ}
+    "\u038f": "\u03a9\u0301",  // Ώ = \'{Ω}
+    "\u1ffa": "\u03a9\u0300",  // Ὼ = \`{Ω}
+};
--- a/test/snapshots/mathml-spec.js.snap
+++ b/test/snapshots/mathml-spec.js.snap
@@ -1,5 +1,62 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP

+exports[`A MathML builder accents turn into <mover accent="true"> in MathML 1`] = `
+
+<math>
+  <semantics>
+    <mrow>
+      <mover accent="true">
+        <mi>
+          u
+        </mi>
+        <mo>
+          ¨
+        </mo>
+      </mover>
+      <mi>
+        b
+      </mi>
+      <mi>
+        e
+      </mi>
+      <mi>
+        r
+      </mi>
+      <mi>
+        f
+      </mi>
+      <mi>
+        i
+      </mi>
+      <mi>
+        a
+      </mi>
+      <mi>
+        n
+      </mi>
+      <mi>
+        c
+      </mi>
+      <mover accent="true">
+        <mi>
+          e
+        </mi>
+        <mo>
+          ´
+        </mo>
+      </mover>
+      <mi>
+        e
+      </mi>
+    </mrow>
+    <annotation encoding="application/x-tex">
+      über fiancée
+    </annotation>
+  </semantics>
+</math>
+
+`;
+
 exports[`A MathML builder should generate <mphantom> nodes for \\phantom 1`] = `

 <math>
--- a/test/errors-spec.js
+++ b/test/errors-spec.js
@@ -375,3 +375,10 @@ describe("Lexer:", function() {
    });

 });
+
+describe("Unicode accents", function() {
+    it("should return error for invalid combining characters", function() {
+        expect("A\u0328").toFailWithParseError(
+            "Unknown accent ' ̨' at position 1: Ą̲̲");
+    });
+});
--- a/test/katex-spec.js
+++ b/test/katex-spec.js
@@ -2757,15 +2757,64 @@ describe("A parser taking String objects", function() {
    });
 });

+describe("Unicode accents", function() {
+    it("should parse Latin-1 letters in math mode", function() {
+        // TODO(edemaine): Unsupported Latin-1 letters in math: ÅåÇÐÞçðþ
+        expect("ÀÁÂÃÄÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäèéêëìíîïñòóôõöùúûüýÿ")
+        .toParseLike(
+            "\\grave A\\acute A\\hat A\\tilde A\\ddot A" +
+            "\\grave E\\acute E\\hat E\\ddot E" +
+            "\\grave I\\acute I\\hat I\\ddot I" +
+            "\\tilde N" +
+            "\\grave O\\acute O\\hat O\\tilde O\\ddot O" +
+            "\\grave U\\acute U\\hat U\\ddot U" +
+            "\\acute Y" +
+            "\\grave a\\acute a\\hat a\\tilde a\\ddot a" +
+            "\\grave e\\acute e\\hat e\\ddot e" +
+            "\\grave ı\\acute ı\\hat ı\\ddot ı" +
+            "\\tilde n" +
+            "\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
+            "\\grave u\\acute u\\hat u\\ddot u" +
+            "\\acute y\\ddot y");
+    });
+
+    it("should parse Latin-1 letters in text mode", function() {
+        // TODO(edemaine): Unsupported Latin-1 letters in text: ÇÐÞçðþ
+        expect("\\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ}")
+        .toParseLike(
+            "\\text{\\`A\\'A\\^A\\~A\\\"A\\r A" +
+            "\\`E\\'E\\^E\\\"E" +
+            "\\`I\\'I\\^I\\\"I" +
+            "\\~N" +
+            "\\`O\\'O\\^O\\~O\\\"O" +
+            "\\`U\\'U\\^U\\\"U" +
+            "\\'Y" +
+            "\\`a\\'a\\^a\\~a\\\"a\\r a" +
+            "\\`e\\'e\\^e\\\"e" +
+            "\\`ı\\'ı\\^ı\\\"ı" +
+            "\\~n" +
+            "\\`o\\'o\\^o\\~o\\\"o" +
+            "\\`u\\'u\\^u\\\"u" +
+            "\\'y\\\"y}");
+    });
+
+    it("should parse combining characters", function() {
+        expect("A\u0301C\u0301").toParseLike("Á\\acute C");
+        expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
+    });
+
+    it("should parse multi-accented characters", function() {
+        expect("ấā́ắ\\text{ấā́ắ}").toParse();
+        // Doesn't parse quite the same as
+        // "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
+    });
+
+    it("should parse accented i's and j's", function() {
+        expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
+    });
+});
+
 describe("Unicode", function() {
-    it("should parse all lower case Greek letters", function() {
-        expect("αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω").toParse();
-    });
-
-    it("should parse 'ΓΔΘΞΠΣΦΨΩ'", function() {
-        expect("ΓΔΘΞΠΣΦΨΩ").toParse();
-    });
-
    it("should parse negated relations", function() {
        expect("∉∤∦≁≆≠≨≩≮≯≰≱⊀⊁⊈⊉⊊⊋⊬⊭⊮⊯⋠⋡⋦⋧⋨⋩⋬⋭⪇⪈⪉⪊⪵⪶⪹⪺⫋⫌").toParse();
    });
--- a/test/mathml-spec.js
+++ b/test/mathml-spec.js
@@ -93,4 +93,8 @@ describe("A MathML builder", function() {
        expect(getMathML(`\\boldsymbol{Ax2k\\omega\\Omega\\imath+}`))
            .toMatchSnapshot();
    });
+
+    it('accents turn into <mover accent="true"> in MathML', function() {
+        expect(getMathML("über fiancée")).toMatchSnapshot();
+    });
 });
--- a/test/screenshotter/images/Unicode-chrome.png
+++ b/test/screenshotter/images/Unicode-chrome.png
--- a/test/screenshotter/images/Unicode-firefox.png
+++ b/test/screenshotter/images/Unicode-firefox.png
--- a/test/unicode-spec.js
+++ b/test/unicode-spec.js
@@ -67,11 +67,21 @@ describe("unicode", function() {
    });

    it("should parse Latin-1 inside \\text{}", function() {
-        expect('\\text{ÀàÇçÉéÏïÖöÛû}').toParse();
+        expect('\\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
+            'ÆÇÐØÞßæçðøþ}').toParse();
    });

    it("should parse Latin-1 outside \\text{}", function() {
-        expect('ÀàÇçÉéÏïÖöÛû').toParse();
+        expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
+            'ÇÐÞçðþ').toParse();
+    });
+
+    it("should parse all lower case Greek letters", function() {
+        expect("αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω").toParse();
+    });
+
+    it("should parse math upper case Greek letters", function() {
+        expect("ΓΔΘΛΞΠΣΥΦΨΩ").toParse();
    });

    it("should parse Cyrillic inside \\text{}", function() {