mirror of
https://github.com/Smaug123/WoofWare.PrattParser
synced 2025-10-06 01:48:39 +00:00
Comments
This commit is contained in:
@@ -28,8 +28,7 @@ module Example =
|
|||||||
| TokenType.RightBracket -> None
|
| TokenType.RightBracket -> None
|
||||||
|
|
||||||
let parser =
|
let parser =
|
||||||
Parser.empty _.Type
|
Parser.make (fun token -> token.Type) atom
|
||||||
|> Parser.defineAtoms atom
|
|
||||||
|> Parser.withUnaryPostfix TokenType.Factorial (7, ()) Expr.factorial
|
|> Parser.withUnaryPostfix TokenType.Factorial (7, ()) Expr.factorial
|
||||||
|> Parser.withUnaryPrefix TokenType.Plus ((), 5) id
|
|> Parser.withUnaryPrefix TokenType.Plus ((), 5) id
|
||||||
|> Parser.withUnaryPrefix TokenType.Minus ((), 5) Expr.unaryMinus
|
|> Parser.withUnaryPrefix TokenType.Minus ((), 5) Expr.unaryMinus
|
||||||
|
@@ -25,6 +25,8 @@ module TestParser =
|
|||||||
(Expr.plus (Expr.var "x") (Expr.constInt 1))
|
(Expr.plus (Expr.var "x") (Expr.constInt 1))
|
||||||
(Expr.var "y")
|
(Expr.var "y")
|
||||||
(Expr.plus (Expr.var "z") (Expr.constInt 3))
|
(Expr.plus (Expr.var "z") (Expr.constInt 3))
|
||||||
|
"if x + 1 then y else z!",
|
||||||
|
Expr.ifThenElse (Expr.plus (Expr.var "x") (Expr.constInt 1)) (Expr.var "y") (Expr.factorial (Expr.var "z"))
|
||||||
"(if x + 1 then y else z) + 3",
|
"(if x + 1 then y else z) + 3",
|
||||||
Expr.plus
|
Expr.plus
|
||||||
(Expr.paren (Expr.ifThenElse (Expr.plus (Expr.var "x") (Expr.constInt 1)) (Expr.var "y") (Expr.var "z")))
|
(Expr.paren (Expr.ifThenElse (Expr.plus (Expr.var "x") (Expr.constInt 1)) (Expr.var "y") (Expr.var "z")))
|
||||||
@@ -46,6 +48,6 @@ module TestParser =
|
|||||||
let ``Parser looks plausible`` (input : string, expected : Expr) =
|
let ``Parser looks plausible`` (input : string, expected : Expr) =
|
||||||
let tokens = Lexer.lex input |> List.ofSeq
|
let tokens = Lexer.lex input |> List.ofSeq
|
||||||
|
|
||||||
let expr, remaining = Parser.parse Example.parser input tokens
|
let expr, remaining = Parser.execute Example.parser input tokens
|
||||||
remaining |> shouldEqual []
|
remaining |> shouldEqual []
|
||||||
expr |> shouldEqual expected
|
expr |> shouldEqual expected
|
||||||
|
@@ -1,28 +1,56 @@
|
|||||||
namespace PrattParser
|
namespace PrattParser
|
||||||
|
|
||||||
|
/// Specification of how to parse things which act like brackets: that is, they start with a token,
|
||||||
|
/// then consume some stuff, then there's another token to mark the end.
|
||||||
|
///
|
||||||
|
/// Optionally you can specify that the bracket-like token consumes something at the end too:
|
||||||
|
/// for example, `if...then...` does not have a trailing "end-if".
|
||||||
|
/// The trailing clause will consume as much as it can, so e.g. `if foo then bar!` would parse as
|
||||||
|
/// `if foo then (bar!)`.
|
||||||
|
///
|
||||||
|
/// Optionally you can specify a single construct with multiple delimiters:
|
||||||
|
/// for example, `if...then...else...` consumes three expressions.
|
||||||
type BracketLikeParser<'tokenTag, 'expr> =
|
type BracketLikeParser<'tokenTag, 'expr> =
|
||||||
{
|
{
|
||||||
/// Whether to consume input after the final token, e.g. like `if...then...else...` consumes,
|
/// Whether to consume input after the final token, e.g. like `if...then...else...` consumes,
|
||||||
/// whereas `(...)` does not
|
/// whereas `(...)` does not.
|
||||||
ConsumeAfterFinalToken : bool
|
ConsumeAfterFinalToken : bool
|
||||||
|
/// The successive list of delimiters after the initial delimiter that "opens the brackets".
|
||||||
|
/// For example, this might be `[then]`, or `[then ; else]`, or `[')']`.
|
||||||
BoundaryTokens : 'tokenTag list
|
BoundaryTokens : 'tokenTag list
|
||||||
|
/// How to build an expression given that you've got all the constituent chunks that came
|
||||||
|
/// between the delimiters.
|
||||||
|
///
|
||||||
|
/// We guarantee that the input list will have (as many elements as BoundaryTokens)+1
|
||||||
|
/// if ConsumeAfterFinalToken is true, or as many elements as BoundaryTokens
|
||||||
|
/// if ConsumeAfterFinalToken is false.
|
||||||
Construct : 'expr list -> 'expr
|
Construct : 'expr list -> 'expr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An entity which knows how to parse a stream of 'tokens into an 'expr.
|
||||||
type Parser<'tokenTag, 'token, 'expr> when 'tokenTag : comparison =
|
type Parser<'tokenTag, 'token, 'expr> when 'tokenTag : comparison =
|
||||||
{
|
private
|
||||||
GetTag : 'token -> 'tokenTag
|
{
|
||||||
UnaryPrefix : Map<'tokenTag, (unit * int) * ('expr -> 'expr)>
|
GetTag : 'token -> 'tokenTag
|
||||||
UnaryPostfix : Map<'tokenTag, (int * unit) * ('expr -> 'expr)>
|
UnaryPrefix : Map<'tokenTag, (unit * int) * ('expr -> 'expr)>
|
||||||
Infix : Map<'tokenTag, (int * int) * ('expr -> 'expr -> 'expr)>
|
UnaryPostfix : Map<'tokenTag, (int * unit) * ('expr -> 'expr)>
|
||||||
Atom : string -> 'token -> 'expr option
|
Infix : Map<'tokenTag, (int * int) * ('expr -> 'expr -> 'expr)>
|
||||||
BracketLike : Map<'tokenTag, BracketLikeParser<'tokenTag, 'expr> list>
|
Atom : string -> 'token -> 'expr option
|
||||||
}
|
BracketLike : Map<'tokenTag, BracketLikeParser<'tokenTag, 'expr> list>
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Module for constructing and executing Parsers.
|
||||||
[<RequireQualifiedAccess>]
|
[<RequireQualifiedAccess>]
|
||||||
module Parser =
|
module Parser =
|
||||||
let empty<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
/// The basic parser with the minimum possible information.
|
||||||
|
/// You specify how to take a token and get a tag from it,
|
||||||
|
/// and you specify how to convert atoms (such as constant ints, or variables) into expressions.
|
||||||
|
///
|
||||||
|
/// The atom-parsing function is given the entire source string, as well as the 'token
|
||||||
|
/// of which we are asking "is this an atom, and if so, how shall it be represented in the AST?".
|
||||||
|
let make<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(getTag : 'token -> 'tokenTag)
|
(getTag : 'token -> 'tokenTag)
|
||||||
|
(atoms : string -> 'token -> 'expr option)
|
||||||
: Parser<'tokenTag, 'token, 'expr>
|
: Parser<'tokenTag, 'token, 'expr>
|
||||||
=
|
=
|
||||||
{
|
{
|
||||||
@@ -30,15 +58,20 @@ module Parser =
|
|||||||
UnaryPrefix = Map.empty
|
UnaryPrefix = Map.empty
|
||||||
UnaryPostfix = Map.empty
|
UnaryPostfix = Map.empty
|
||||||
Infix = Map.empty
|
Infix = Map.empty
|
||||||
Atom = fun _ _ -> None
|
Atom = atoms
|
||||||
BracketLike = Map.empty
|
BracketLike = Map.empty
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a prefix operator to this parser.
|
||||||
|
/// The precedence is an int, where higher numbers bind more tightly.
|
||||||
|
/// (Following [matklad](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html), we
|
||||||
|
/// express this as `unit * int` to make it clear that it's binding on the right.)
|
||||||
let withUnaryPrefix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
let withUnaryPrefix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(tokenType : 'tokenTag)
|
(tokenType : 'tokenTag)
|
||||||
(precedence : unit * int)
|
(precedence : unit * int)
|
||||||
(construct : 'expr -> 'expr)
|
(construct : 'expr -> 'expr)
|
||||||
(parser : Parser<'tokenTag, 'token, 'expr>)
|
(parser : Parser<'tokenTag, 'token, 'expr>)
|
||||||
|
: Parser<'tokenTag, 'token, 'expr>
|
||||||
=
|
=
|
||||||
{ parser with
|
{ parser with
|
||||||
UnaryPrefix =
|
UnaryPrefix =
|
||||||
@@ -52,11 +85,16 @@ module Parser =
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a postfix operator to this parser.
|
||||||
|
/// The precedence is an int, where higher numbers bind more tightly.
|
||||||
|
/// (Following [matklad](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html), we
|
||||||
|
/// express this as `int * unit` to make it clear that it's binding on the left.)
|
||||||
let withUnaryPostfix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
let withUnaryPostfix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(tokenType : 'tokenTag)
|
(tokenType : 'tokenTag)
|
||||||
(precedence : int * unit)
|
(precedence : int * unit)
|
||||||
(construct : 'expr -> 'expr)
|
(construct : 'expr -> 'expr)
|
||||||
(parser : Parser<'tokenTag, 'token, 'expr>)
|
(parser : Parser<'tokenTag, 'token, 'expr>)
|
||||||
|
: Parser<'tokenTag, 'token, 'expr>
|
||||||
=
|
=
|
||||||
{ parser with
|
{ parser with
|
||||||
UnaryPostfix =
|
UnaryPostfix =
|
||||||
@@ -70,6 +108,19 @@ module Parser =
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a binary infix operator to this parser.
|
||||||
|
/// The precedence is a pair of ints, where higher numbers bind more tightly.
|
||||||
|
///
|
||||||
|
/// For example, to make an operator associate on the left, you would give it
|
||||||
|
/// tighter (higher-precedence) binding on the right, whereupon parsing would proceed as follows:
|
||||||
|
///
|
||||||
|
/// 1 + 2 + 3 := 1 +2 +3
|
||||||
|
///
|
||||||
|
/// after which the only possible bracketing that doesn't split up a tightly-bound operator is:
|
||||||
|
///
|
||||||
|
/// (1 + 2) + 3
|
||||||
|
///
|
||||||
|
/// This situation could be specified with a precedence of (n, n + 1), for example.
|
||||||
let withInfix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
let withInfix<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(tokenType : 'tokenTag)
|
(tokenType : 'tokenTag)
|
||||||
(precedence : int * int)
|
(precedence : int * int)
|
||||||
@@ -89,6 +140,8 @@ module Parser =
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a bracket-like parser to the parser, introduced by a given delimiter.
|
||||||
|
/// See the docs for BracketLikeParser.
|
||||||
let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
let withBracketLike<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(tokenType : 'tokenTag)
|
(tokenType : 'tokenTag)
|
||||||
(toAdd : BracketLikeParser<'tokenTag, 'expr>)
|
(toAdd : BracketLikeParser<'tokenTag, 'expr>)
|
||||||
@@ -107,15 +160,6 @@ module Parser =
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
let defineAtoms<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
|
||||||
(atom : string -> 'token -> 'expr option)
|
|
||||||
(parser : Parser<'tokenTag, 'token, 'expr>)
|
|
||||||
: Parser<'tokenTag, 'token, 'expr>
|
|
||||||
=
|
|
||||||
{ parser with
|
|
||||||
Atom = atom
|
|
||||||
}
|
|
||||||
|
|
||||||
let rec private parseBracketLike
|
let rec private parseBracketLike
|
||||||
(parser : Parser<'tokenTag, 'token, 'expr>)
|
(parser : Parser<'tokenTag, 'token, 'expr>)
|
||||||
(inputString : string)
|
(inputString : string)
|
||||||
@@ -252,9 +296,14 @@ module Parser =
|
|||||||
|
|
||||||
go lhs rest
|
go lhs rest
|
||||||
|
|
||||||
let parse<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
/// Execute the given parser against a string which was tokenised in the given way.
|
||||||
|
/// We give you the string so that you may have your tokens slice into it.
|
||||||
|
///
|
||||||
|
/// Returns the parsed expression, and any leftover tokens that may be trailing.
|
||||||
|
let execute<'tokenTag, 'token, 'expr when 'tokenTag : comparison>
|
||||||
(parser : Parser<'tokenTag, 'token, 'expr>)
|
(parser : Parser<'tokenTag, 'token, 'expr>)
|
||||||
(inputString : string)
|
(inputString : string)
|
||||||
(tokens : 'token list)
|
(tokens : 'token list)
|
||||||
|
: 'expr * 'token list
|
||||||
=
|
=
|
||||||
parseInner parser inputString tokens 0
|
parseInner parser inputString tokens 0
|
||||||
|
Reference in New Issue
Block a user