mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-07 12:18:39 +00:00
Rewrote lexer, avoiding some mode-specific distinctions
There are two main motivations for this commit. One is unicode input, which requires unicode characters to get past the lexer. See discussion in #261. The second is in preparation for #266, where we'd deal with one token of look-ahead but might be lexing that token in an unknown mode in some cases. The unit test shipped with this commit addresses the latter concern, since it checks that a math-mode-only token may immediately follow some text mode content group. In this new implementation, all the various things that could get matched have been collected into a single regular expression. The hope is that this will be beneficial for performance and keep the code simpler. The code was written with Unicode input in mind, including non-BMP codepoints. The role of the lexer as a gate keeper, keeping out invalid TeX syntax, has been abandoned. That role is still fulfilled by the symbols and functions tables, though, since any input which is neither a symbol nor a command is still considered invalid input, even though it lexes successfully.
This commit is contained in:
@@ -665,6 +665,7 @@ describe("A text parser", function() {
|
||||
var leadingSpaceTextExpression = "\\text {moo}";
|
||||
var badTextExpression = "\\text{a b%}";
|
||||
var badFunctionExpression = "\\text{\\sqrt{x}}";
|
||||
var mathTokenAfterText = "\\text{sin}^2";
|
||||
|
||||
it("should not fail", function() {
|
||||
expect(textExpression).toParse();
|
||||
@@ -710,6 +711,10 @@ describe("A text parser", function() {
|
||||
expect(group[3].type).toMatch("spacing");
|
||||
});
|
||||
|
||||
it("should accept math mode tokens after its argument", function() {
|
||||
expect(mathTokenAfterText).toParse();
|
||||
});
|
||||
|
||||
it("should ignore a space before the text group", function() {
|
||||
var parse = getParsed(leadingSpaceTextExpression)[0];
|
||||
// [m, o, o]
|
||||
|
Reference in New Issue
Block a user