From 8cee6accf9e9d96841b5d8de3d436b806db1b2c7 Mon Sep 17 00:00:00 2001 From: Smaug123 Date: Mon, 22 Jan 2024 22:43:44 +0000 Subject: [PATCH] Add array indexing --- PrattParser.Example/Domain.fs | 9 ++++++++ PrattParser.Example/Example.fs | 31 +++++++++++++++++++++------ PrattParser.Example/Lexer.fs | 3 +++ PrattParser.Test/TestParser.fs | 25 ++++++++++++++++++++++ PrattParser/Parser.fs | 38 +++++++++++++++++++++++++--------- 5 files changed, 90 insertions(+), 16 deletions(-) diff --git a/PrattParser.Example/Domain.fs b/PrattParser.Example/Domain.fs index 924e81e..6415294 100644 --- a/PrattParser.Example/Domain.fs +++ b/PrattParser.Example/Domain.fs @@ -6,6 +6,7 @@ type Expr = | Times of Expr * Expr | UnaryMinus of Expr | Minus of Expr * Expr + | Equal of Expr * Expr | Int of int | FunctionCall of Expr * Expr | Var of string @@ -13,10 +14,12 @@ type Expr = | Paren of Expr | IfThenElse of Expr * Expr * Expr | IfThen of Expr * Expr + | ArrayIndex of Expr * Expr [] module Expr = let plus a b = Expr.Plus (a, b) + let equal a b = Expr.Equal (a, b) let times a b = Expr.Times (a, b) let unaryMinus a = Expr.UnaryMinus a let minus a b = Expr.Minus (a, b) @@ -25,6 +28,7 @@ module Expr = let var name = Expr.Var name let factorial a = Expr.Factorial a let paren a = Expr.Paren a + let arrayIndex a b = Expr.ArrayIndex (a, b) let ifThenElse ifClause thenClause elseClause = Expr.IfThenElse (ifClause, thenClause, elseClause) @@ -36,9 +40,12 @@ type TokenType = | Plus | Minus | Times + | Equal | ConstInt | LeftBracket | RightBracket + | ArrayIndex + | RightSquareBracket | Var | Factorial | If @@ -74,4 +81,6 @@ module Token = | '+' -> standalone' TokenType.Plus i |> Some | '-' -> standalone' TokenType.Minus i |> Some | '!' -> standalone' TokenType.Factorial i |> Some + | '=' -> standalone' TokenType.Equal i |> Some + | ']' -> standalone' TokenType.RightSquareBracket i |> Some | _ -> None diff --git a/PrattParser.Example/Example.fs b/PrattParser.Example/Example.fs index 2cc1b15..a63297b 100644 --- a/PrattParser.Example/Example.fs +++ b/PrattParser.Example/Example.fs @@ -20,24 +20,29 @@ module Example = | TokenType.Plus | TokenType.Minus | TokenType.Times + | TokenType.Equal | TokenType.Factorial | TokenType.If | TokenType.Then | TokenType.Else + | TokenType.ArrayIndex + | TokenType.RightSquareBracket | TokenType.LeftBracket | TokenType.RightBracket -> None let parser = Parser.make (fun token -> token.Type) atom - |> Parser.withUnaryPostfix TokenType.Factorial (7, ()) Expr.factorial - |> Parser.withUnaryPrefix TokenType.Plus ((), 5) id - |> Parser.withUnaryPrefix TokenType.Minus ((), 5) Expr.unaryMinus - |> Parser.withInfix TokenType.Plus (1, 2) Expr.plus - |> Parser.withInfix TokenType.Minus (1, 2) Expr.minus - |> Parser.withInfix TokenType.Times (1, 2) Expr.times + |> Parser.withUnaryPostfix TokenType.Factorial (11, ()) Expr.factorial + |> Parser.withUnaryPrefix TokenType.Plus ((), 9) id + |> Parser.withUnaryPrefix TokenType.Minus ((), 9) Expr.unaryMinus + |> Parser.withInfix TokenType.Plus (5, 6) Expr.plus + |> Parser.withInfix TokenType.Minus (5, 6) Expr.minus + |> Parser.withInfix TokenType.Times (7, 8) Expr.times + |> Parser.withInfix TokenType.Equal (2, 1) Expr.equal |> Parser.withBracketLike TokenType.LeftBracket { + ConsumeBeforeInitialToken = false ConsumeAfterFinalToken = false BoundaryTokens = [ TokenType.RightBracket ] Construct = Seq.exactlyOne >> Expr.paren @@ -45,6 +50,7 @@ module Example = |> Parser.withBracketLike TokenType.If { + ConsumeBeforeInitialToken = false ConsumeAfterFinalToken = true BoundaryTokens = [ TokenType.Then ; TokenType.Else ] Construct = @@ -56,6 +62,7 @@ module Example = |> Parser.withBracketLike TokenType.If { + ConsumeBeforeInitialToken = false ConsumeAfterFinalToken = true BoundaryTokens = [ TokenType.Then ] Construct = @@ -64,3 +71,15 @@ module Example = | [ ifClause ; thenClause ] -> Expr.ifThen ifClause thenClause | _ -> failwith "logic error" } + |> Parser.withBracketLike + TokenType.ArrayIndex + { + ConsumeBeforeInitialToken = true + ConsumeAfterFinalToken = false + BoundaryTokens = [ TokenType.RightSquareBracket ] + Construct = + fun s -> + match s with + | [ arg ; contents ] -> Expr.arrayIndex arg contents + | _ -> failwith "logic error" + } diff --git a/PrattParser.Example/Lexer.fs b/PrattParser.Example/Lexer.fs index 07d0f46..7b7ba02 100644 --- a/PrattParser.Example/Lexer.fs +++ b/PrattParser.Example/Lexer.fs @@ -28,6 +28,9 @@ module Lexer = yield Token.standalone TokenType.ConstInt startI (i - startI) | _, ' ' -> i <- i + 1 + | _, '.' when i < s.Length - 1 && s.[i + 1] = '[' -> + yield Token.standalone TokenType.ArrayIndex i 2 + i <- i + 2 | _, 'i' when i < s.Length - 1 && s.[i + 1] = 'f' -> yield Token.standalone TokenType.If i 2 i <- i + 2 diff --git a/PrattParser.Test/TestParser.fs b/PrattParser.Test/TestParser.fs index bd085f7..f5c38e2 100644 --- a/PrattParser.Test/TestParser.fs +++ b/PrattParser.Test/TestParser.fs @@ -11,6 +11,20 @@ module TestParser = let parserTestCases = [ "1", Expr.constInt 1 + "1 + 2 * 3", Expr.plus (Expr.constInt 1) (Expr.times (Expr.constInt 2) (Expr.constInt 3)) + "a + b * c * d + e", + Expr.plus + (Expr.plus (Expr.var "a") (Expr.times (Expr.times (Expr.var "b") (Expr.var "c")) (Expr.var "d"))) + (Expr.var "e") + "--1 * 2", Expr.times (Expr.unaryMinus (Expr.unaryMinus (Expr.constInt 1))) (Expr.constInt 2) + "-9!", Expr.unaryMinus (Expr.factorial (Expr.constInt 9)) + "(((0)))", Expr.paren (Expr.paren (Expr.paren (Expr.constInt 0))) + "x.[0].[1]", Expr.arrayIndex (Expr.arrayIndex (Expr.var "x") (Expr.constInt 0)) (Expr.constInt 1) + "if a = 0 then b else c = d", + Expr.ifThenElse + (Expr.equal (Expr.var "a") (Expr.constInt 0)) + (Expr.var "b") + (Expr.equal (Expr.var "c") (Expr.var "d")) "a", Expr.var "a" "-1", Expr.unaryMinus (Expr.constInt 1) "-a", Expr.unaryMinus (Expr.var "a") @@ -20,6 +34,17 @@ module TestParser = "if x then y else z", Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z") "if x then y", Expr.ifThen (Expr.var "x") (Expr.var "y") "1 + if x then y", Expr.plus (Expr.constInt 1) (Expr.ifThen (Expr.var "x") (Expr.var "y")) + "if x then y else if r then s", + Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.ifThen (Expr.var "r") (Expr.var "s")) + "if x then y else if r then s else 5", + Expr.ifThenElse + (Expr.var "x") + (Expr.var "y") + (Expr.ifThenElse (Expr.var "r") (Expr.var "s") (Expr.constInt 5)) + "if if x then y else z then a", + Expr.ifThen (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a") + "if if x then y else z then a else b", + Expr.ifThenElse (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a") (Expr.var "b") "if x + 1 then y else z + 3", Expr.ifThenElse (Expr.plus (Expr.var "x") (Expr.constInt 1)) diff --git a/PrattParser/Parser.fs b/PrattParser/Parser.fs index 1d5cb44..424ca16 100644 --- a/PrattParser/Parser.fs +++ b/PrattParser/Parser.fs @@ -8,6 +8,11 @@ namespace PrattParser /// The trailing clause will consume as much as it can, so e.g. `if foo then bar!` would parse as /// `if foo then (bar!)`. /// +/// Optionally you can specify that the bracket-like token consumes something at the beginning too: +/// for example, `a.[3]` is a bracket pair `.[` and `]` with two inputs. +/// Note that you could use this to implement binary operators, but they will bind as loosely as possible +/// if you do this, and it's less efficient, and it's probably confusing to think about associativity. +/// /// Optionally you can specify a single construct with multiple delimiters: /// for example, `if...then...else...` consumes three expressions. type BracketLikeParser<'tokenTag, 'expr> = @@ -15,6 +20,8 @@ type BracketLikeParser<'tokenTag, 'expr> = /// Whether to consume input after the final token, e.g. like `if...then...else...` consumes, /// whereas `(...)` does not. ConsumeAfterFinalToken : bool + /// Whether to consume the input before the initial token, e.g. like `a.[5]` consumes the `a`. + ConsumeBeforeInitialToken : bool /// The successive list of delimiters after the initial delimiter that "opens the brackets". /// For example, this might be `[then]`, or `[then ; else]`, or `[')']`. BoundaryTokens : 'tokenTag list @@ -142,6 +149,11 @@ module Parser = /// Add a bracket-like parser to the parser, introduced by a given delimiter. /// See the docs for BracketLikeParser. + /// + /// If you have multiple `BracketLikeParser`s, each with the same beginning delimiter, + /// we will try them all, and return the valid one which had the most bracket-like tokens in. + /// It's probably possible to create an ambiguous parse this way with an inappropriate grammar; + /// if this happens while parsing, we throw. let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison> (tokenType : 'tokenTag) (toAdd : BracketLikeParser<'tokenTag, 'expr>) @@ -237,9 +249,7 @@ module Parser = let lhs, rest = match parser.Atom inputString firstToken with - | Some token -> - printfn "Parsed an atom: %+A" token - token, rest + | Some token -> token, rest | None -> match parser.BracketLike.TryGetValue (parser.GetTag firstToken) with @@ -257,9 +267,7 @@ module Parser = match parser.UnaryPrefix.TryGetValue (parser.GetTag firstToken) with | true, (((), precedence), assemble) -> - printfn "Parsing a prefix op: %+A" firstToken let rhs, rest = parseInner parser inputString rest precedence - printfn "Returning to parse of prefix op: %+A, remaining tokens: %+A" firstToken rest assemble rhs, rest | false, _ -> failwithf "didn't get an atom or prefix, got: %+A" firstToken @@ -268,25 +276,35 @@ module Parser = | [] -> lhs, [] | op :: rest -> + let fromBracketed = + match parser.BracketLike.TryGetValue (parser.GetTag op) with + | true, parse -> + let parse = parse |> List.filter _.ConsumeBeforeInitialToken + + match parseBracketLike parser inputString parse [ lhs ] rest with + | [ result ] -> Some result + | _ :: _ -> failwithf "Ambiguous parse (multiple matches) at token %+A" op + | [] -> None + | false, _ -> None + + match fromBracketed with + | Some (lhs, rest) -> go lhs rest + | None -> + match parser.UnaryPostfix.TryGetValue (parser.GetTag op) with | true, ((precedence, ()), construct) -> if precedence < minBinding then - printfn "Hit a postfix op which does not bind: %+A" op lhs, rest else - printfn "Hit a postfix op which binds: %+A" op go (construct lhs) rest | false, _ -> match parser.Infix.TryGetValue (parser.GetTag op) with | true, ((leftBinding, rightBinding), construct) -> if leftBinding < minBinding then - printfn "Hit an infix op which does not bind on the left: %+A" op lhs, op :: rest else - printfn "Hit an infix op which binds on the left: %+A" op - let rhs, remainingTokens = parseInner parser inputString rest rightBinding go (construct lhs rhs) remainingTokens