mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-05 03:08:40 +00:00
Unicode accents (#992)
* Unicode accents * Lexer now looks for combining dicritical marks and adds them to the same character * Parser's `parseSymbol` now recognizes both combined and uncombined forms of Unicode accents, and builds accent objects just like the accent functions * Added CJK support to math mode (not just text mode) * Add invalid combining character test * Add MathML test * Add weak support for other Latin-1 characters This maintains backwards compatibility, but it uses the wrong font. There's a TODO to fix this later. Also refactor symbol code to use for..of * Update Unicode screenshot * Remove dot from accented i and j (in math mode) Also add dotless Unicode characters to support some accented i's and j's * Fix \imath, \jmath, \pounds, and more tests * Switch from for..of to .split().forEach() Save around 800 bytes in minified code * Fix split * normalize() detection * Convert back to vanilla for loops * Fix merge * Move normalize dependency to unicodeMake.js * Make unicodeSymbols into a lookup table instead of macros This is important for multi-accented characters. * Add comments about when to run * Move symbols definition into unicodeMake/Symbols.js * Remove CJK support in text mode * Add missing semicolon * Refactor unicodeAccents to its own file * Dotless i/j support in text mode * Remove excess character mappings * Fix Åå in math mode (still via Times) * Update to support #1030 * Add accented Greek letter support (for supported Greek symbols) * Update screenshot * remove Æ, æ, Ø, ø, and ß from math mode test
This commit is contained in:
committed by
Kevin Barabash
parent
d822f04b9b
commit
484d44ee70
@@ -1,5 +1,62 @@
|
||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`A MathML builder accents turn into <mover accent="true"> in MathML 1`] = `
|
||||
|
||||
<math>
|
||||
<semantics>
|
||||
<mrow>
|
||||
<mover accent="true">
|
||||
<mi>
|
||||
u
|
||||
</mi>
|
||||
<mo>
|
||||
¨
|
||||
</mo>
|
||||
</mover>
|
||||
<mi>
|
||||
b
|
||||
</mi>
|
||||
<mi>
|
||||
e
|
||||
</mi>
|
||||
<mi>
|
||||
r
|
||||
</mi>
|
||||
<mi>
|
||||
f
|
||||
</mi>
|
||||
<mi>
|
||||
i
|
||||
</mi>
|
||||
<mi>
|
||||
a
|
||||
</mi>
|
||||
<mi>
|
||||
n
|
||||
</mi>
|
||||
<mi>
|
||||
c
|
||||
</mi>
|
||||
<mover accent="true">
|
||||
<mi>
|
||||
e
|
||||
</mi>
|
||||
<mo>
|
||||
´
|
||||
</mo>
|
||||
</mover>
|
||||
<mi>
|
||||
e
|
||||
</mi>
|
||||
</mrow>
|
||||
<annotation encoding="application/x-tex">
|
||||
über fiancée
|
||||
</annotation>
|
||||
</semantics>
|
||||
</math>
|
||||
|
||||
`;
|
||||
|
||||
exports[`A MathML builder should generate <mphantom> nodes for \\phantom 1`] = `
|
||||
|
||||
<math>
|
||||
|
@@ -375,3 +375,10 @@ describe("Lexer:", function() {
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
describe("Unicode accents", function() {
|
||||
it("should return error for invalid combining characters", function() {
|
||||
expect("A\u0328").toFailWithParseError(
|
||||
"Unknown accent ' ̨' at position 1: Ą̲̲");
|
||||
});
|
||||
});
|
||||
|
@@ -2757,15 +2757,64 @@ describe("A parser taking String objects", function() {
|
||||
});
|
||||
});
|
||||
|
||||
describe("Unicode accents", function() {
|
||||
it("should parse Latin-1 letters in math mode", function() {
|
||||
// TODO(edemaine): Unsupported Latin-1 letters in math: ÅåÇÐÞçðþ
|
||||
expect("ÀÁÂÃÄÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäèéêëìíîïñòóôõöùúûüýÿ")
|
||||
.toParseLike(
|
||||
"\\grave A\\acute A\\hat A\\tilde A\\ddot A" +
|
||||
"\\grave E\\acute E\\hat E\\ddot E" +
|
||||
"\\grave I\\acute I\\hat I\\ddot I" +
|
||||
"\\tilde N" +
|
||||
"\\grave O\\acute O\\hat O\\tilde O\\ddot O" +
|
||||
"\\grave U\\acute U\\hat U\\ddot U" +
|
||||
"\\acute Y" +
|
||||
"\\grave a\\acute a\\hat a\\tilde a\\ddot a" +
|
||||
"\\grave e\\acute e\\hat e\\ddot e" +
|
||||
"\\grave ı\\acute ı\\hat ı\\ddot ı" +
|
||||
"\\tilde n" +
|
||||
"\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
|
||||
"\\grave u\\acute u\\hat u\\ddot u" +
|
||||
"\\acute y\\ddot y");
|
||||
});
|
||||
|
||||
it("should parse Latin-1 letters in text mode", function() {
|
||||
// TODO(edemaine): Unsupported Latin-1 letters in text: ÇÐÞçðþ
|
||||
expect("\\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ}")
|
||||
.toParseLike(
|
||||
"\\text{\\`A\\'A\\^A\\~A\\\"A\\r A" +
|
||||
"\\`E\\'E\\^E\\\"E" +
|
||||
"\\`I\\'I\\^I\\\"I" +
|
||||
"\\~N" +
|
||||
"\\`O\\'O\\^O\\~O\\\"O" +
|
||||
"\\`U\\'U\\^U\\\"U" +
|
||||
"\\'Y" +
|
||||
"\\`a\\'a\\^a\\~a\\\"a\\r a" +
|
||||
"\\`e\\'e\\^e\\\"e" +
|
||||
"\\`ı\\'ı\\^ı\\\"ı" +
|
||||
"\\~n" +
|
||||
"\\`o\\'o\\^o\\~o\\\"o" +
|
||||
"\\`u\\'u\\^u\\\"u" +
|
||||
"\\'y\\\"y}");
|
||||
});
|
||||
|
||||
it("should parse combining characters", function() {
|
||||
expect("A\u0301C\u0301").toParseLike("Á\\acute C");
|
||||
expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
|
||||
});
|
||||
|
||||
it("should parse multi-accented characters", function() {
|
||||
expect("ấā́ắ\\text{ấā́ắ}").toParse();
|
||||
// Doesn't parse quite the same as
|
||||
// "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
|
||||
});
|
||||
|
||||
it("should parse accented i's and j's", function() {
|
||||
expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Unicode", function() {
|
||||
it("should parse all lower case Greek letters", function() {
|
||||
expect("αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω").toParse();
|
||||
});
|
||||
|
||||
it("should parse 'ΓΔΘΞΠΣΦΨΩ'", function() {
|
||||
expect("ΓΔΘΞΠΣΦΨΩ").toParse();
|
||||
});
|
||||
|
||||
it("should parse negated relations", function() {
|
||||
expect("∉∤∦≁≆≠≨≩≮≯≰≱⊀⊁⊈⊉⊊⊋⊬⊭⊮⊯⋠⋡⋦⋧⋨⋩⋬⋭⪇⪈⪉⪊⪵⪶⪹⪺⫋⫌").toParse();
|
||||
});
|
||||
|
@@ -93,4 +93,8 @@ describe("A MathML builder", function() {
|
||||
expect(getMathML(`\\boldsymbol{Ax2k\\omega\\Omega\\imath+}`))
|
||||
.toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('accents turn into <mover accent="true"> in MathML', function() {
|
||||
expect(getMathML("über fiancée")).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 25 KiB |
Binary file not shown.
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
@@ -67,11 +67,21 @@ describe("unicode", function() {
|
||||
});
|
||||
|
||||
it("should parse Latin-1 inside \\text{}", function() {
|
||||
expect('\\text{ÀàÇçÉéÏïÖöÛû}').toParse();
|
||||
expect('\\text{ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
|
||||
'ÆÇÐØÞßæçðøþ}').toParse();
|
||||
});
|
||||
|
||||
it("should parse Latin-1 outside \\text{}", function() {
|
||||
expect('ÀàÇçÉéÏïÖöÛû').toParse();
|
||||
expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' +
|
||||
'ÇÐÞçðþ').toParse();
|
||||
});
|
||||
|
||||
it("should parse all lower case Greek letters", function() {
|
||||
expect("αβγδεϵζηθϑικλμνξοπϖρϱςστυφϕχψω").toParse();
|
||||
});
|
||||
|
||||
it("should parse math upper case Greek letters", function() {
|
||||
expect("ΓΔΘΛΞΠΣΥΦΨΩ").toParse();
|
||||
});
|
||||
|
||||
it("should parse Cyrillic inside \\text{}", function() {
|
||||
|
Reference in New Issue
Block a user