Add support for comments, fixes #20 (#884)

2025-10-19 01:28:40 +00:00 · 2017-09-25 21:50:27 -06:00
parent 6de5446913
commit eaef0127c5
4 changed files with 50 additions and 5 deletions
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -19,6 +19,7 @@ import {LexerInterface, Token} from "./Token";

 /* The following tokenRegex
 * - matches typical whitespace (but not NBSP etc.) using its first group
+ * - matches comments (must have trailing newlines)
 * - does not match any control character \x00-\x1f except whitespace
 * - does not match a bare backslash
 * - matches any ASCII character except those just mentioned
@@ -32,9 +33,11 @@ import {LexerInterface, Token} from "./Token";
 * If there is no matching function or symbol definition, the Parser will
 * still reject the input.
 */
+const commentRegexString = "%[^\n]*[\n]";
 const tokenRegex = new RegExp(
    "([ \r\n\t]+)|" +                                 // whitespace
-    "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    `(${commentRegexString}|` +                       // comments
+    "[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +   // single codepoint
    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
    "|\\\\verb\\*([^]).*?\\3" +                       // \verb*
    "|\\\\verb([^*a-zA-Z]).*?\\4" +                   // \verb unstarred
@@ -42,6 +45,8 @@ const tokenRegex = new RegExp(
    ")"
 );

+const commentRegex = new RegExp(commentRegexString);
+
 /** Main Lexer class */
 export default class Lexer implements LexerInterface {
    input: string;
@@ -71,6 +76,11 @@ export default class Lexer implements LexerInterface {
        const start = this.pos;
        this.pos += match[0].length;
        const end = this.pos;
-        return new Token(text, new SourceLocation(this, start, end));
+
+        if (commentRegex.test(text)) {
+            return this.lex();
+        } else {
+            return new Token(text, new SourceLocation(this, start, end));
+        }
    }
 }