mirror of
https://github.com/Smaug123/WoofWare.PrattParser
synced 2025-10-05 01:18:42 +00:00
Add array indexing
This commit is contained in:
@@ -6,6 +6,7 @@ type Expr =
|
||||
| Times of Expr * Expr
|
||||
| UnaryMinus of Expr
|
||||
| Minus of Expr * Expr
|
||||
| Equal of Expr * Expr
|
||||
| Int of int
|
||||
| FunctionCall of Expr * Expr
|
||||
| Var of string
|
||||
@@ -13,10 +14,12 @@ type Expr =
|
||||
| Paren of Expr
|
||||
| IfThenElse of Expr * Expr * Expr
|
||||
| IfThen of Expr * Expr
|
||||
| ArrayIndex of Expr * Expr
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module Expr =
|
||||
let plus a b = Expr.Plus (a, b)
|
||||
let equal a b = Expr.Equal (a, b)
|
||||
let times a b = Expr.Times (a, b)
|
||||
let unaryMinus a = Expr.UnaryMinus a
|
||||
let minus a b = Expr.Minus (a, b)
|
||||
@@ -25,6 +28,7 @@ module Expr =
|
||||
let var name = Expr.Var name
|
||||
let factorial a = Expr.Factorial a
|
||||
let paren a = Expr.Paren a
|
||||
let arrayIndex a b = Expr.ArrayIndex (a, b)
|
||||
|
||||
let ifThenElse ifClause thenClause elseClause =
|
||||
Expr.IfThenElse (ifClause, thenClause, elseClause)
|
||||
@@ -36,9 +40,12 @@ type TokenType =
|
||||
| Plus
|
||||
| Minus
|
||||
| Times
|
||||
| Equal
|
||||
| ConstInt
|
||||
| LeftBracket
|
||||
| RightBracket
|
||||
| ArrayIndex
|
||||
| RightSquareBracket
|
||||
| Var
|
||||
| Factorial
|
||||
| If
|
||||
@@ -74,4 +81,6 @@ module Token =
|
||||
| '+' -> standalone' TokenType.Plus i |> Some
|
||||
| '-' -> standalone' TokenType.Minus i |> Some
|
||||
| '!' -> standalone' TokenType.Factorial i |> Some
|
||||
| '=' -> standalone' TokenType.Equal i |> Some
|
||||
| ']' -> standalone' TokenType.RightSquareBracket i |> Some
|
||||
| _ -> None
|
||||
|
@@ -20,24 +20,29 @@ module Example =
|
||||
| TokenType.Plus
|
||||
| TokenType.Minus
|
||||
| TokenType.Times
|
||||
| TokenType.Equal
|
||||
| TokenType.Factorial
|
||||
| TokenType.If
|
||||
| TokenType.Then
|
||||
| TokenType.Else
|
||||
| TokenType.ArrayIndex
|
||||
| TokenType.RightSquareBracket
|
||||
| TokenType.LeftBracket
|
||||
| TokenType.RightBracket -> None
|
||||
|
||||
let parser =
|
||||
Parser.make (fun token -> token.Type) atom
|
||||
|> Parser.withUnaryPostfix TokenType.Factorial (7, ()) Expr.factorial
|
||||
|> Parser.withUnaryPrefix TokenType.Plus ((), 5) id
|
||||
|> Parser.withUnaryPrefix TokenType.Minus ((), 5) Expr.unaryMinus
|
||||
|> Parser.withInfix TokenType.Plus (1, 2) Expr.plus
|
||||
|> Parser.withInfix TokenType.Minus (1, 2) Expr.minus
|
||||
|> Parser.withInfix TokenType.Times (1, 2) Expr.times
|
||||
|> Parser.withUnaryPostfix TokenType.Factorial (11, ()) Expr.factorial
|
||||
|> Parser.withUnaryPrefix TokenType.Plus ((), 9) id
|
||||
|> Parser.withUnaryPrefix TokenType.Minus ((), 9) Expr.unaryMinus
|
||||
|> Parser.withInfix TokenType.Plus (5, 6) Expr.plus
|
||||
|> Parser.withInfix TokenType.Minus (5, 6) Expr.minus
|
||||
|> Parser.withInfix TokenType.Times (7, 8) Expr.times
|
||||
|> Parser.withInfix TokenType.Equal (2, 1) Expr.equal
|
||||
|> Parser.withBracketLike
|
||||
TokenType.LeftBracket
|
||||
{
|
||||
ConsumeBeforeInitialToken = false
|
||||
ConsumeAfterFinalToken = false
|
||||
BoundaryTokens = [ TokenType.RightBracket ]
|
||||
Construct = Seq.exactlyOne >> Expr.paren
|
||||
@@ -45,6 +50,7 @@ module Example =
|
||||
|> Parser.withBracketLike
|
||||
TokenType.If
|
||||
{
|
||||
ConsumeBeforeInitialToken = false
|
||||
ConsumeAfterFinalToken = true
|
||||
BoundaryTokens = [ TokenType.Then ; TokenType.Else ]
|
||||
Construct =
|
||||
@@ -56,6 +62,7 @@ module Example =
|
||||
|> Parser.withBracketLike
|
||||
TokenType.If
|
||||
{
|
||||
ConsumeBeforeInitialToken = false
|
||||
ConsumeAfterFinalToken = true
|
||||
BoundaryTokens = [ TokenType.Then ]
|
||||
Construct =
|
||||
@@ -64,3 +71,15 @@ module Example =
|
||||
| [ ifClause ; thenClause ] -> Expr.ifThen ifClause thenClause
|
||||
| _ -> failwith "logic error"
|
||||
}
|
||||
|> Parser.withBracketLike
|
||||
TokenType.ArrayIndex
|
||||
{
|
||||
ConsumeBeforeInitialToken = true
|
||||
ConsumeAfterFinalToken = false
|
||||
BoundaryTokens = [ TokenType.RightSquareBracket ]
|
||||
Construct =
|
||||
fun s ->
|
||||
match s with
|
||||
| [ arg ; contents ] -> Expr.arrayIndex arg contents
|
||||
| _ -> failwith "logic error"
|
||||
}
|
||||
|
@@ -28,6 +28,9 @@ module Lexer =
|
||||
|
||||
yield Token.standalone TokenType.ConstInt startI (i - startI)
|
||||
| _, ' ' -> i <- i + 1
|
||||
| _, '.' when i < s.Length - 1 && s.[i + 1] = '[' ->
|
||||
yield Token.standalone TokenType.ArrayIndex i 2
|
||||
i <- i + 2
|
||||
| _, 'i' when i < s.Length - 1 && s.[i + 1] = 'f' ->
|
||||
yield Token.standalone TokenType.If i 2
|
||||
i <- i + 2
|
||||
|
@@ -11,6 +11,20 @@ module TestParser =
|
||||
let parserTestCases =
|
||||
[
|
||||
"1", Expr.constInt 1
|
||||
"1 + 2 * 3", Expr.plus (Expr.constInt 1) (Expr.times (Expr.constInt 2) (Expr.constInt 3))
|
||||
"a + b * c * d + e",
|
||||
Expr.plus
|
||||
(Expr.plus (Expr.var "a") (Expr.times (Expr.times (Expr.var "b") (Expr.var "c")) (Expr.var "d")))
|
||||
(Expr.var "e")
|
||||
"--1 * 2", Expr.times (Expr.unaryMinus (Expr.unaryMinus (Expr.constInt 1))) (Expr.constInt 2)
|
||||
"-9!", Expr.unaryMinus (Expr.factorial (Expr.constInt 9))
|
||||
"(((0)))", Expr.paren (Expr.paren (Expr.paren (Expr.constInt 0)))
|
||||
"x.[0].[1]", Expr.arrayIndex (Expr.arrayIndex (Expr.var "x") (Expr.constInt 0)) (Expr.constInt 1)
|
||||
"if a = 0 then b else c = d",
|
||||
Expr.ifThenElse
|
||||
(Expr.equal (Expr.var "a") (Expr.constInt 0))
|
||||
(Expr.var "b")
|
||||
(Expr.equal (Expr.var "c") (Expr.var "d"))
|
||||
"a", Expr.var "a"
|
||||
"-1", Expr.unaryMinus (Expr.constInt 1)
|
||||
"-a", Expr.unaryMinus (Expr.var "a")
|
||||
@@ -20,6 +34,17 @@ module TestParser =
|
||||
"if x then y else z", Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")
|
||||
"if x then y", Expr.ifThen (Expr.var "x") (Expr.var "y")
|
||||
"1 + if x then y", Expr.plus (Expr.constInt 1) (Expr.ifThen (Expr.var "x") (Expr.var "y"))
|
||||
"if x then y else if r then s",
|
||||
Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.ifThen (Expr.var "r") (Expr.var "s"))
|
||||
"if x then y else if r then s else 5",
|
||||
Expr.ifThenElse
|
||||
(Expr.var "x")
|
||||
(Expr.var "y")
|
||||
(Expr.ifThenElse (Expr.var "r") (Expr.var "s") (Expr.constInt 5))
|
||||
"if if x then y else z then a",
|
||||
Expr.ifThen (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a")
|
||||
"if if x then y else z then a else b",
|
||||
Expr.ifThenElse (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a") (Expr.var "b")
|
||||
"if x + 1 then y else z + 3",
|
||||
Expr.ifThenElse
|
||||
(Expr.plus (Expr.var "x") (Expr.constInt 1))
|
||||
|
@@ -8,6 +8,11 @@ namespace PrattParser
|
||||
/// The trailing clause will consume as much as it can, so e.g. `if foo then bar!` would parse as
|
||||
/// `if foo then (bar!)`.
|
||||
///
|
||||
/// Optionally you can specify that the bracket-like token consumes something at the beginning too:
|
||||
/// for example, `a.[3]` is a bracket pair `.[` and `]` with two inputs.
|
||||
/// Note that you could use this to implement binary operators, but they will bind as loosely as possible
|
||||
/// if you do this, and it's less efficient, and it's probably confusing to think about associativity.
|
||||
///
|
||||
/// Optionally you can specify a single construct with multiple delimiters:
|
||||
/// for example, `if...then...else...` consumes three expressions.
|
||||
type BracketLikeParser<'tokenTag, 'expr> =
|
||||
@@ -15,6 +20,8 @@ type BracketLikeParser<'tokenTag, 'expr> =
|
||||
/// Whether to consume input after the final token, e.g. like `if...then...else...` consumes,
|
||||
/// whereas `(...)` does not.
|
||||
ConsumeAfterFinalToken : bool
|
||||
/// Whether to consume the input before the initial token, e.g. like `a.[5]` consumes the `a`.
|
||||
ConsumeBeforeInitialToken : bool
|
||||
/// The successive list of delimiters after the initial delimiter that "opens the brackets".
|
||||
/// For example, this might be `[then]`, or `[then ; else]`, or `[')']`.
|
||||
BoundaryTokens : 'tokenTag list
|
||||
@@ -142,6 +149,11 @@ module Parser =
|
||||
|
||||
/// Add a bracket-like parser to the parser, introduced by a given delimiter.
|
||||
/// See the docs for BracketLikeParser.
|
||||
///
|
||||
/// If you have multiple `BracketLikeParser`s, each with the same beginning delimiter,
|
||||
/// we will try them all, and return the valid one which had the most bracket-like tokens in.
|
||||
/// It's probably possible to create an ambiguous parse this way with an inappropriate grammar;
|
||||
/// if this happens while parsing, we throw.
|
||||
let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||
(tokenType : 'tokenTag)
|
||||
(toAdd : BracketLikeParser<'tokenTag, 'expr>)
|
||||
@@ -237,9 +249,7 @@ module Parser =
|
||||
|
||||
let lhs, rest =
|
||||
match parser.Atom inputString firstToken with
|
||||
| Some token ->
|
||||
printfn "Parsed an atom: %+A" token
|
||||
token, rest
|
||||
| Some token -> token, rest
|
||||
| None ->
|
||||
|
||||
match parser.BracketLike.TryGetValue (parser.GetTag firstToken) with
|
||||
@@ -257,9 +267,7 @@ module Parser =
|
||||
|
||||
match parser.UnaryPrefix.TryGetValue (parser.GetTag firstToken) with
|
||||
| true, (((), precedence), assemble) ->
|
||||
printfn "Parsing a prefix op: %+A" firstToken
|
||||
let rhs, rest = parseInner parser inputString rest precedence
|
||||
printfn "Returning to parse of prefix op: %+A, remaining tokens: %+A" firstToken rest
|
||||
assemble rhs, rest
|
||||
| false, _ -> failwithf "didn't get an atom or prefix, got: %+A" firstToken
|
||||
|
||||
@@ -268,25 +276,35 @@ module Parser =
|
||||
| [] -> lhs, []
|
||||
| op :: rest ->
|
||||
|
||||
let fromBracketed =
|
||||
match parser.BracketLike.TryGetValue (parser.GetTag op) with
|
||||
| true, parse ->
|
||||
let parse = parse |> List.filter _.ConsumeBeforeInitialToken
|
||||
|
||||
match parseBracketLike parser inputString parse [ lhs ] rest with
|
||||
| [ result ] -> Some result
|
||||
| _ :: _ -> failwithf "Ambiguous parse (multiple matches) at token %+A" op
|
||||
| [] -> None
|
||||
| false, _ -> None
|
||||
|
||||
match fromBracketed with
|
||||
| Some (lhs, rest) -> go lhs rest
|
||||
| None ->
|
||||
|
||||
match parser.UnaryPostfix.TryGetValue (parser.GetTag op) with
|
||||
| true, ((precedence, ()), construct) ->
|
||||
if precedence < minBinding then
|
||||
printfn "Hit a postfix op which does not bind: %+A" op
|
||||
lhs, rest
|
||||
else
|
||||
printfn "Hit a postfix op which binds: %+A" op
|
||||
go (construct lhs) rest
|
||||
| false, _ ->
|
||||
|
||||
match parser.Infix.TryGetValue (parser.GetTag op) with
|
||||
| true, ((leftBinding, rightBinding), construct) ->
|
||||
if leftBinding < minBinding then
|
||||
printfn "Hit an infix op which does not bind on the left: %+A" op
|
||||
lhs, op :: rest
|
||||
else
|
||||
|
||||
printfn "Hit an infix op which binds on the left: %+A" op
|
||||
|
||||
let rhs, remainingTokens = parseInner parser inputString rest rightBinding
|
||||
|
||||
go (construct lhs rhs) remainingTokens
|
||||
|
Reference in New Issue
Block a user