mirror of
https://github.com/Smaug123/WoofWare.PrattParser
synced 2025-10-05 01:18:42 +00:00
Add array indexing
This commit is contained in:
@@ -6,6 +6,7 @@ type Expr =
|
|||||||
| Times of Expr * Expr
|
| Times of Expr * Expr
|
||||||
| UnaryMinus of Expr
|
| UnaryMinus of Expr
|
||||||
| Minus of Expr * Expr
|
| Minus of Expr * Expr
|
||||||
|
| Equal of Expr * Expr
|
||||||
| Int of int
|
| Int of int
|
||||||
| FunctionCall of Expr * Expr
|
| FunctionCall of Expr * Expr
|
||||||
| Var of string
|
| Var of string
|
||||||
@@ -13,10 +14,12 @@ type Expr =
|
|||||||
| Paren of Expr
|
| Paren of Expr
|
||||||
| IfThenElse of Expr * Expr * Expr
|
| IfThenElse of Expr * Expr * Expr
|
||||||
| IfThen of Expr * Expr
|
| IfThen of Expr * Expr
|
||||||
|
| ArrayIndex of Expr * Expr
|
||||||
|
|
||||||
[<RequireQualifiedAccess>]
|
[<RequireQualifiedAccess>]
|
||||||
module Expr =
|
module Expr =
|
||||||
let plus a b = Expr.Plus (a, b)
|
let plus a b = Expr.Plus (a, b)
|
||||||
|
let equal a b = Expr.Equal (a, b)
|
||||||
let times a b = Expr.Times (a, b)
|
let times a b = Expr.Times (a, b)
|
||||||
let unaryMinus a = Expr.UnaryMinus a
|
let unaryMinus a = Expr.UnaryMinus a
|
||||||
let minus a b = Expr.Minus (a, b)
|
let minus a b = Expr.Minus (a, b)
|
||||||
@@ -25,6 +28,7 @@ module Expr =
|
|||||||
let var name = Expr.Var name
|
let var name = Expr.Var name
|
||||||
let factorial a = Expr.Factorial a
|
let factorial a = Expr.Factorial a
|
||||||
let paren a = Expr.Paren a
|
let paren a = Expr.Paren a
|
||||||
|
let arrayIndex a b = Expr.ArrayIndex (a, b)
|
||||||
|
|
||||||
let ifThenElse ifClause thenClause elseClause =
|
let ifThenElse ifClause thenClause elseClause =
|
||||||
Expr.IfThenElse (ifClause, thenClause, elseClause)
|
Expr.IfThenElse (ifClause, thenClause, elseClause)
|
||||||
@@ -36,9 +40,12 @@ type TokenType =
|
|||||||
| Plus
|
| Plus
|
||||||
| Minus
|
| Minus
|
||||||
| Times
|
| Times
|
||||||
|
| Equal
|
||||||
| ConstInt
|
| ConstInt
|
||||||
| LeftBracket
|
| LeftBracket
|
||||||
| RightBracket
|
| RightBracket
|
||||||
|
| ArrayIndex
|
||||||
|
| RightSquareBracket
|
||||||
| Var
|
| Var
|
||||||
| Factorial
|
| Factorial
|
||||||
| If
|
| If
|
||||||
@@ -74,4 +81,6 @@ module Token =
|
|||||||
| '+' -> standalone' TokenType.Plus i |> Some
|
| '+' -> standalone' TokenType.Plus i |> Some
|
||||||
| '-' -> standalone' TokenType.Minus i |> Some
|
| '-' -> standalone' TokenType.Minus i |> Some
|
||||||
| '!' -> standalone' TokenType.Factorial i |> Some
|
| '!' -> standalone' TokenType.Factorial i |> Some
|
||||||
|
| '=' -> standalone' TokenType.Equal i |> Some
|
||||||
|
| ']' -> standalone' TokenType.RightSquareBracket i |> Some
|
||||||
| _ -> None
|
| _ -> None
|
||||||
|
@@ -20,24 +20,29 @@ module Example =
|
|||||||
| TokenType.Plus
|
| TokenType.Plus
|
||||||
| TokenType.Minus
|
| TokenType.Minus
|
||||||
| TokenType.Times
|
| TokenType.Times
|
||||||
|
| TokenType.Equal
|
||||||
| TokenType.Factorial
|
| TokenType.Factorial
|
||||||
| TokenType.If
|
| TokenType.If
|
||||||
| TokenType.Then
|
| TokenType.Then
|
||||||
| TokenType.Else
|
| TokenType.Else
|
||||||
|
| TokenType.ArrayIndex
|
||||||
|
| TokenType.RightSquareBracket
|
||||||
| TokenType.LeftBracket
|
| TokenType.LeftBracket
|
||||||
| TokenType.RightBracket -> None
|
| TokenType.RightBracket -> None
|
||||||
|
|
||||||
let parser =
|
let parser =
|
||||||
Parser.make (fun token -> token.Type) atom
|
Parser.make (fun token -> token.Type) atom
|
||||||
|> Parser.withUnaryPostfix TokenType.Factorial (7, ()) Expr.factorial
|
|> Parser.withUnaryPostfix TokenType.Factorial (11, ()) Expr.factorial
|
||||||
|> Parser.withUnaryPrefix TokenType.Plus ((), 5) id
|
|> Parser.withUnaryPrefix TokenType.Plus ((), 9) id
|
||||||
|> Parser.withUnaryPrefix TokenType.Minus ((), 5) Expr.unaryMinus
|
|> Parser.withUnaryPrefix TokenType.Minus ((), 9) Expr.unaryMinus
|
||||||
|> Parser.withInfix TokenType.Plus (1, 2) Expr.plus
|
|> Parser.withInfix TokenType.Plus (5, 6) Expr.plus
|
||||||
|> Parser.withInfix TokenType.Minus (1, 2) Expr.minus
|
|> Parser.withInfix TokenType.Minus (5, 6) Expr.minus
|
||||||
|> Parser.withInfix TokenType.Times (1, 2) Expr.times
|
|> Parser.withInfix TokenType.Times (7, 8) Expr.times
|
||||||
|
|> Parser.withInfix TokenType.Equal (2, 1) Expr.equal
|
||||||
|> Parser.withBracketLike
|
|> Parser.withBracketLike
|
||||||
TokenType.LeftBracket
|
TokenType.LeftBracket
|
||||||
{
|
{
|
||||||
|
ConsumeBeforeInitialToken = false
|
||||||
ConsumeAfterFinalToken = false
|
ConsumeAfterFinalToken = false
|
||||||
BoundaryTokens = [ TokenType.RightBracket ]
|
BoundaryTokens = [ TokenType.RightBracket ]
|
||||||
Construct = Seq.exactlyOne >> Expr.paren
|
Construct = Seq.exactlyOne >> Expr.paren
|
||||||
@@ -45,6 +50,7 @@ module Example =
|
|||||||
|> Parser.withBracketLike
|
|> Parser.withBracketLike
|
||||||
TokenType.If
|
TokenType.If
|
||||||
{
|
{
|
||||||
|
ConsumeBeforeInitialToken = false
|
||||||
ConsumeAfterFinalToken = true
|
ConsumeAfterFinalToken = true
|
||||||
BoundaryTokens = [ TokenType.Then ; TokenType.Else ]
|
BoundaryTokens = [ TokenType.Then ; TokenType.Else ]
|
||||||
Construct =
|
Construct =
|
||||||
@@ -56,6 +62,7 @@ module Example =
|
|||||||
|> Parser.withBracketLike
|
|> Parser.withBracketLike
|
||||||
TokenType.If
|
TokenType.If
|
||||||
{
|
{
|
||||||
|
ConsumeBeforeInitialToken = false
|
||||||
ConsumeAfterFinalToken = true
|
ConsumeAfterFinalToken = true
|
||||||
BoundaryTokens = [ TokenType.Then ]
|
BoundaryTokens = [ TokenType.Then ]
|
||||||
Construct =
|
Construct =
|
||||||
@@ -64,3 +71,15 @@ module Example =
|
|||||||
| [ ifClause ; thenClause ] -> Expr.ifThen ifClause thenClause
|
| [ ifClause ; thenClause ] -> Expr.ifThen ifClause thenClause
|
||||||
| _ -> failwith "logic error"
|
| _ -> failwith "logic error"
|
||||||
}
|
}
|
||||||
|
|> Parser.withBracketLike
|
||||||
|
TokenType.ArrayIndex
|
||||||
|
{
|
||||||
|
ConsumeBeforeInitialToken = true
|
||||||
|
ConsumeAfterFinalToken = false
|
||||||
|
BoundaryTokens = [ TokenType.RightSquareBracket ]
|
||||||
|
Construct =
|
||||||
|
fun s ->
|
||||||
|
match s with
|
||||||
|
| [ arg ; contents ] -> Expr.arrayIndex arg contents
|
||||||
|
| _ -> failwith "logic error"
|
||||||
|
}
|
||||||
|
@@ -28,6 +28,9 @@ module Lexer =
|
|||||||
|
|
||||||
yield Token.standalone TokenType.ConstInt startI (i - startI)
|
yield Token.standalone TokenType.ConstInt startI (i - startI)
|
||||||
| _, ' ' -> i <- i + 1
|
| _, ' ' -> i <- i + 1
|
||||||
|
| _, '.' when i < s.Length - 1 && s.[i + 1] = '[' ->
|
||||||
|
yield Token.standalone TokenType.ArrayIndex i 2
|
||||||
|
i <- i + 2
|
||||||
| _, 'i' when i < s.Length - 1 && s.[i + 1] = 'f' ->
|
| _, 'i' when i < s.Length - 1 && s.[i + 1] = 'f' ->
|
||||||
yield Token.standalone TokenType.If i 2
|
yield Token.standalone TokenType.If i 2
|
||||||
i <- i + 2
|
i <- i + 2
|
||||||
|
@@ -11,6 +11,20 @@ module TestParser =
|
|||||||
let parserTestCases =
|
let parserTestCases =
|
||||||
[
|
[
|
||||||
"1", Expr.constInt 1
|
"1", Expr.constInt 1
|
||||||
|
"1 + 2 * 3", Expr.plus (Expr.constInt 1) (Expr.times (Expr.constInt 2) (Expr.constInt 3))
|
||||||
|
"a + b * c * d + e",
|
||||||
|
Expr.plus
|
||||||
|
(Expr.plus (Expr.var "a") (Expr.times (Expr.times (Expr.var "b") (Expr.var "c")) (Expr.var "d")))
|
||||||
|
(Expr.var "e")
|
||||||
|
"--1 * 2", Expr.times (Expr.unaryMinus (Expr.unaryMinus (Expr.constInt 1))) (Expr.constInt 2)
|
||||||
|
"-9!", Expr.unaryMinus (Expr.factorial (Expr.constInt 9))
|
||||||
|
"(((0)))", Expr.paren (Expr.paren (Expr.paren (Expr.constInt 0)))
|
||||||
|
"x.[0].[1]", Expr.arrayIndex (Expr.arrayIndex (Expr.var "x") (Expr.constInt 0)) (Expr.constInt 1)
|
||||||
|
"if a = 0 then b else c = d",
|
||||||
|
Expr.ifThenElse
|
||||||
|
(Expr.equal (Expr.var "a") (Expr.constInt 0))
|
||||||
|
(Expr.var "b")
|
||||||
|
(Expr.equal (Expr.var "c") (Expr.var "d"))
|
||||||
"a", Expr.var "a"
|
"a", Expr.var "a"
|
||||||
"-1", Expr.unaryMinus (Expr.constInt 1)
|
"-1", Expr.unaryMinus (Expr.constInt 1)
|
||||||
"-a", Expr.unaryMinus (Expr.var "a")
|
"-a", Expr.unaryMinus (Expr.var "a")
|
||||||
@@ -20,6 +34,17 @@ module TestParser =
|
|||||||
"if x then y else z", Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")
|
"if x then y else z", Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")
|
||||||
"if x then y", Expr.ifThen (Expr.var "x") (Expr.var "y")
|
"if x then y", Expr.ifThen (Expr.var "x") (Expr.var "y")
|
||||||
"1 + if x then y", Expr.plus (Expr.constInt 1) (Expr.ifThen (Expr.var "x") (Expr.var "y"))
|
"1 + if x then y", Expr.plus (Expr.constInt 1) (Expr.ifThen (Expr.var "x") (Expr.var "y"))
|
||||||
|
"if x then y else if r then s",
|
||||||
|
Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.ifThen (Expr.var "r") (Expr.var "s"))
|
||||||
|
"if x then y else if r then s else 5",
|
||||||
|
Expr.ifThenElse
|
||||||
|
(Expr.var "x")
|
||||||
|
(Expr.var "y")
|
||||||
|
(Expr.ifThenElse (Expr.var "r") (Expr.var "s") (Expr.constInt 5))
|
||||||
|
"if if x then y else z then a",
|
||||||
|
Expr.ifThen (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a")
|
||||||
|
"if if x then y else z then a else b",
|
||||||
|
Expr.ifThenElse (Expr.ifThenElse (Expr.var "x") (Expr.var "y") (Expr.var "z")) (Expr.var "a") (Expr.var "b")
|
||||||
"if x + 1 then y else z + 3",
|
"if x + 1 then y else z + 3",
|
||||||
Expr.ifThenElse
|
Expr.ifThenElse
|
||||||
(Expr.plus (Expr.var "x") (Expr.constInt 1))
|
(Expr.plus (Expr.var "x") (Expr.constInt 1))
|
||||||
|
@@ -8,6 +8,11 @@ namespace PrattParser
|
|||||||
/// The trailing clause will consume as much as it can, so e.g. `if foo then bar!` would parse as
|
/// The trailing clause will consume as much as it can, so e.g. `if foo then bar!` would parse as
|
||||||
/// `if foo then (bar!)`.
|
/// `if foo then (bar!)`.
|
||||||
///
|
///
|
||||||
|
/// Optionally you can specify that the bracket-like token consumes something at the beginning too:
|
||||||
|
/// for example, `a.[3]` is a bracket pair `.[` and `]` with two inputs.
|
||||||
|
/// Note that you could use this to implement binary operators, but they will bind as loosely as possible
|
||||||
|
/// if you do this, and it's less efficient, and it's probably confusing to think about associativity.
|
||||||
|
///
|
||||||
/// Optionally you can specify a single construct with multiple delimiters:
|
/// Optionally you can specify a single construct with multiple delimiters:
|
||||||
/// for example, `if...then...else...` consumes three expressions.
|
/// for example, `if...then...else...` consumes three expressions.
|
||||||
type BracketLikeParser<'tokenTag, 'expr> =
|
type BracketLikeParser<'tokenTag, 'expr> =
|
||||||
@@ -15,6 +20,8 @@ type BracketLikeParser<'tokenTag, 'expr> =
|
|||||||
/// Whether to consume input after the final token, e.g. like `if...then...else...` consumes,
|
/// Whether to consume input after the final token, e.g. like `if...then...else...` consumes,
|
||||||
/// whereas `(...)` does not.
|
/// whereas `(...)` does not.
|
||||||
ConsumeAfterFinalToken : bool
|
ConsumeAfterFinalToken : bool
|
||||||
|
/// Whether to consume the input before the initial token, e.g. like `a.[5]` consumes the `a`.
|
||||||
|
ConsumeBeforeInitialToken : bool
|
||||||
/// The successive list of delimiters after the initial delimiter that "opens the brackets".
|
/// The successive list of delimiters after the initial delimiter that "opens the brackets".
|
||||||
/// For example, this might be `[then]`, or `[then ; else]`, or `[')']`.
|
/// For example, this might be `[then]`, or `[then ; else]`, or `[')']`.
|
||||||
BoundaryTokens : 'tokenTag list
|
BoundaryTokens : 'tokenTag list
|
||||||
@@ -142,6 +149,11 @@ module Parser =
|
|||||||
|
|
||||||
/// Add a bracket-like parser to the parser, introduced by a given delimiter.
|
/// Add a bracket-like parser to the parser, introduced by a given delimiter.
|
||||||
/// See the docs for BracketLikeParser.
|
/// See the docs for BracketLikeParser.
|
||||||
|
///
|
||||||
|
/// If you have multiple `BracketLikeParser`s, each with the same beginning delimiter,
|
||||||
|
/// we will try them all, and return the valid one which had the most bracket-like tokens in.
|
||||||
|
/// It's probably possible to create an ambiguous parse this way with an inappropriate grammar;
|
||||||
|
/// if this happens while parsing, we throw.
|
||||||
let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(tokenType : 'tokenTag)
|
(tokenType : 'tokenTag)
|
||||||
(toAdd : BracketLikeParser<'tokenTag, 'expr>)
|
(toAdd : BracketLikeParser<'tokenTag, 'expr>)
|
||||||
@@ -237,9 +249,7 @@ module Parser =
|
|||||||
|
|
||||||
let lhs, rest =
|
let lhs, rest =
|
||||||
match parser.Atom inputString firstToken with
|
match parser.Atom inputString firstToken with
|
||||||
| Some token ->
|
| Some token -> token, rest
|
||||||
printfn "Parsed an atom: %+A" token
|
|
||||||
token, rest
|
|
||||||
| None ->
|
| None ->
|
||||||
|
|
||||||
match parser.BracketLike.TryGetValue (parser.GetTag firstToken) with
|
match parser.BracketLike.TryGetValue (parser.GetTag firstToken) with
|
||||||
@@ -257,9 +267,7 @@ module Parser =
|
|||||||
|
|
||||||
match parser.UnaryPrefix.TryGetValue (parser.GetTag firstToken) with
|
match parser.UnaryPrefix.TryGetValue (parser.GetTag firstToken) with
|
||||||
| true, (((), precedence), assemble) ->
|
| true, (((), precedence), assemble) ->
|
||||||
printfn "Parsing a prefix op: %+A" firstToken
|
|
||||||
let rhs, rest = parseInner parser inputString rest precedence
|
let rhs, rest = parseInner parser inputString rest precedence
|
||||||
printfn "Returning to parse of prefix op: %+A, remaining tokens: %+A" firstToken rest
|
|
||||||
assemble rhs, rest
|
assemble rhs, rest
|
||||||
| false, _ -> failwithf "didn't get an atom or prefix, got: %+A" firstToken
|
| false, _ -> failwithf "didn't get an atom or prefix, got: %+A" firstToken
|
||||||
|
|
||||||
@@ -268,25 +276,35 @@ module Parser =
|
|||||||
| [] -> lhs, []
|
| [] -> lhs, []
|
||||||
| op :: rest ->
|
| op :: rest ->
|
||||||
|
|
||||||
|
let fromBracketed =
|
||||||
|
match parser.BracketLike.TryGetValue (parser.GetTag op) with
|
||||||
|
| true, parse ->
|
||||||
|
let parse = parse |> List.filter _.ConsumeBeforeInitialToken
|
||||||
|
|
||||||
|
match parseBracketLike parser inputString parse [ lhs ] rest with
|
||||||
|
| [ result ] -> Some result
|
||||||
|
| _ :: _ -> failwithf "Ambiguous parse (multiple matches) at token %+A" op
|
||||||
|
| [] -> None
|
||||||
|
| false, _ -> None
|
||||||
|
|
||||||
|
match fromBracketed with
|
||||||
|
| Some (lhs, rest) -> go lhs rest
|
||||||
|
| None ->
|
||||||
|
|
||||||
match parser.UnaryPostfix.TryGetValue (parser.GetTag op) with
|
match parser.UnaryPostfix.TryGetValue (parser.GetTag op) with
|
||||||
| true, ((precedence, ()), construct) ->
|
| true, ((precedence, ()), construct) ->
|
||||||
if precedence < minBinding then
|
if precedence < minBinding then
|
||||||
printfn "Hit a postfix op which does not bind: %+A" op
|
|
||||||
lhs, rest
|
lhs, rest
|
||||||
else
|
else
|
||||||
printfn "Hit a postfix op which binds: %+A" op
|
|
||||||
go (construct lhs) rest
|
go (construct lhs) rest
|
||||||
| false, _ ->
|
| false, _ ->
|
||||||
|
|
||||||
match parser.Infix.TryGetValue (parser.GetTag op) with
|
match parser.Infix.TryGetValue (parser.GetTag op) with
|
||||||
| true, ((leftBinding, rightBinding), construct) ->
|
| true, ((leftBinding, rightBinding), construct) ->
|
||||||
if leftBinding < minBinding then
|
if leftBinding < minBinding then
|
||||||
printfn "Hit an infix op which does not bind on the left: %+A" op
|
|
||||||
lhs, op :: rest
|
lhs, op :: rest
|
||||||
else
|
else
|
||||||
|
|
||||||
printfn "Hit an infix op which binds on the left: %+A" op
|
|
||||||
|
|
||||||
let rhs, remainingTokens = parseInner parser inputString rest rightBinding
|
let rhs, remainingTokens = parseInner parser inputString rest rightBinding
|
||||||
|
|
||||||
go (construct lhs rhs) remainingTokens
|
go (construct lhs rhs) remainingTokens
|
||||||
|
Reference in New Issue
Block a user