mirror of
https://github.com/Smaug123/KaTeX
synced 2025-10-19 01:28:40 +00:00
14
src/Lexer.js
14
src/Lexer.js
@@ -19,6 +19,7 @@ import {LexerInterface, Token} from "./Token";
|
||||
|
||||
/* The following tokenRegex
|
||||
* - matches typical whitespace (but not NBSP etc.) using its first group
|
||||
* - matches comments (must have trailing newlines)
|
||||
* - does not match any control character \x00-\x1f except whitespace
|
||||
* - does not match a bare backslash
|
||||
* - matches any ASCII character except those just mentioned
|
||||
@@ -32,9 +33,11 @@ import {LexerInterface, Token} from "./Token";
|
||||
* If there is no matching function or symbol definition, the Parser will
|
||||
* still reject the input.
|
||||
*/
|
||||
const commentRegexString = "%[^\n]*[\n]";
|
||||
const tokenRegex = new RegExp(
|
||||
"([ \r\n\t]+)|" + // whitespace
|
||||
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||
`(${commentRegexString}|` + // comments
|
||||
"[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
||||
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
|
||||
"|\\\\verb\\*([^]).*?\\3" + // \verb*
|
||||
"|\\\\verb([^*a-zA-Z]).*?\\4" + // \verb unstarred
|
||||
@@ -42,6 +45,8 @@ const tokenRegex = new RegExp(
|
||||
")"
|
||||
);
|
||||
|
||||
const commentRegex = new RegExp(commentRegexString);
|
||||
|
||||
/** Main Lexer class */
|
||||
export default class Lexer implements LexerInterface {
|
||||
input: string;
|
||||
@@ -71,6 +76,11 @@ export default class Lexer implements LexerInterface {
|
||||
const start = this.pos;
|
||||
this.pos += match[0].length;
|
||||
const end = this.pos;
|
||||
return new Token(text, new SourceLocation(this, start, end));
|
||||
|
||||
if (commentRegex.test(text)) {
|
||||
return this.lex();
|
||||
} else {
|
||||
return new Token(text, new SourceLocation(this, start, end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user