crafting-interpreters-hs/src/Lox/Parser.hs

318 lines
13 KiB
Haskell
Raw Normal View History

2025-12-25 17:55:28 +03:00
module Lox.Parser (
2025-12-30 05:43:01 +03:00
SyntaxError (..),
2025-12-25 18:01:59 +03:00
parse
2025-12-25 17:55:28 +03:00
) where
import Control.Monad
import Control.Monad.State
2025-12-30 02:56:06 +03:00
import Data.Either
import Data.Maybe
2025-12-25 17:55:28 +03:00
import Lox.Scanner
import Lox.Expr
2025-12-30 05:43:01 +03:00
import Control.Monad.Extra (ifM)
2025-12-25 17:55:28 +03:00
data ParserState = ParserState {tokens :: [Token]}
2025-12-30 05:43:01 +03:00
data SyntaxError = SyntaxError String deriving Show
2025-12-30 01:27:01 +03:00
2025-12-30 02:56:06 +03:00
-- program → declaration* EOF ;
--
-- declaration → varDecl
-- | statement ;
2025-12-30 01:27:01 +03:00
--
-- statement → exprStmt
2025-12-30 06:43:29 +03:00
-- | forStmt
2025-12-30 05:43:01 +03:00
-- | ifStmt
2025-12-30 03:45:21 +03:00
-- | printStmt
2025-12-30 06:43:29 +03:00
-- | whileStmt
2025-12-30 03:45:21 +03:00
-- | block ;
2025-12-30 01:27:01 +03:00
--
-- exprStmt → expression ";" ;
2025-12-30 06:43:29 +03:00
-- forStmt → "for" "(" ( varDecl | exprStmt | ";" )
-- expression? ";"
-- expression? ") statement ;
2025-12-30 05:43:01 +03:00
-- ifStmt → "if" "(" expression ")" statement
-- ( "else" statement )? ;
2025-12-30 01:27:01 +03:00
-- printStmt → "print" expression ";" ;
2025-12-30 06:43:29 +03:00
-- whileStmt → "while" "(" expression ")" statement ;
2025-12-30 02:56:06 +03:00
-- varDecl → "var" IDENTIFIER ( "=" expression )? ";" ;
2025-12-30 03:45:21 +03:00
-- block → "{" declaration* "}" ;
2025-12-30 05:43:01 +03:00
--
2025-12-30 02:56:06 +03:00
-- expression → assignment ;
-- assignment → IDENTIFIER "=" assignment
2025-12-30 05:43:01 +03:00
-- | logic_or;
-- logic_or → logic_and ( "or" logic_and )* ;
-- logic_and → equality ( "and" equality )* ;
2025-12-25 17:55:28 +03:00
-- equality → comparison ( ( "!=" | "==" ) comparison )* ;
-- comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
-- term → factor ( ( "-" | "+" ) factor )* ;
-- factor → unary ( ( "/" | "*" ) unary )* ;
-- unary → ( "!" | "-" ) unary
-- | primary ;
-- primary → NUMBER | STRING | "true" | "false" | "nil"
2025-12-30 02:56:06 +03:00
-- | "(" expression ")" | IDENTIFIER;
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
parse :: [Token] -> Either SyntaxError [Stmt]
2025-12-30 01:27:01 +03:00
parse tokens = evalState program (ParserState {tokens=tokens})
2025-12-30 05:43:01 +03:00
program :: State ParserState (Either SyntaxError [Stmt])
2025-12-30 01:27:01 +03:00
program = do
atEnd <- isAtEnd
if atEnd then return $ Right [] else do
2025-12-30 02:56:06 +03:00
headMaybe <- declaration
2025-12-30 01:27:01 +03:00
case headMaybe of
Left err -> return $ Left err
Right head -> do
tailMaybe <- program
case tailMaybe of
Left err -> return $ Left err
Right tail -> return $ Right $ head : tail
2025-12-30 05:43:01 +03:00
declaration :: State ParserState (Either SyntaxError Stmt)
2025-12-30 02:56:06 +03:00
declaration = do
varMaybe <- matchToken [VAR]
case varMaybe of
Just _ -> varDeclaration
_ -> statement
2025-12-30 05:43:01 +03:00
varDeclaration :: State ParserState (Either SyntaxError Stmt)
2025-12-30 02:56:06 +03:00
varDeclaration = do
2025-12-30 05:43:01 +03:00
maybeName <- consume IDENTIFIER $ SyntaxError "Expected variable name"
2025-12-30 02:56:06 +03:00
case maybeName of
Left err -> return $ Left err
Right name -> do
hasInit <- isJust <$> matchToken [EQUAL]
initMaybe <- if hasInit then expression else return $ Right $ LiteralExpr NullObject
2025-12-30 05:43:01 +03:00
semicolonMaybe <- consume SEMICOLON $ SyntaxError "Expected semicolon"
2025-12-30 02:56:06 +03:00
case (initMaybe, semicolonMaybe) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
(Right init, Right _) -> return $ Right $ VariableStmt name init
2025-12-30 01:27:01 +03:00
2025-12-30 05:43:01 +03:00
statement :: State ParserState (Either SyntaxError Stmt)
2025-12-30 01:27:01 +03:00
statement = do
2025-12-30 06:43:29 +03:00
tokenTypeMaybe <- fmap tokenType <$> matchToken [FOR, IF, PRINT, WHILE, LEFT_BRACE]
case tokenTypeMaybe of
Just FOR -> forStatement
Just IF -> ifStatement
Just PRINT -> printStatement
Just WHILE -> whileStatement
Just LEFT_BRACE -> do
2025-12-30 03:45:21 +03:00
result <- fmap BlockStmt <$> block
2025-12-30 05:43:01 +03:00
braceMaybe <- consume RIGHT_BRACE $ SyntaxError "Expected '}' after block"
2025-12-30 03:45:21 +03:00
return $ braceMaybe >> result
2025-12-30 01:27:01 +03:00
_ -> expressionStatement
2025-12-30 05:43:01 +03:00
block :: State ParserState (Either SyntaxError [Stmt])
2025-12-30 03:45:21 +03:00
block = do
isRightBrace <- check RIGHT_BRACE
if isRightBrace then return $ Right [] else do
declMaybe <- declaration
tailMaybe <- block
case (declMaybe, tailMaybe) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
(Right decl, Right tail) -> return $ Right $ decl : tail
2025-12-30 06:43:29 +03:00
forStatement :: State ParserState (Either SyntaxError Stmt)
forStatement = do
leftParen <- consume LEFT_PAREN $ SyntaxError "Expected '(' after 'for'"
tokenTypeMaybe <- fmap tokenType <$> matchToken [SEMICOLON, VAR]
initializer <- case tokenTypeMaybe of
Just SEMICOLON -> return Nothing
Just VAR -> Just <$> varDeclaration
_ -> Just <$> expressionStatement
condition <- ifM (check SEMICOLON) (return Nothing) (Just <$> expression)
conditionSemicolon <- consume SEMICOLON $ SyntaxError "Expected ';' after loop condition"
increment <- ifM (check RIGHT_PAREN) (return Nothing) (Just <$> expression)
rightParen <- consume RIGHT_PAREN $ SyntaxError "Expected ')' after for clauses"
body <- statement
body1 <- case increment of
Just inc -> return $ BlockStmt <$> ((\x y -> [x, y]) <$> body <*> (ExpressionStmt <$> inc))
Nothing -> return body
cond1 <- case condition of
Just cond -> return cond
Nothing -> return $ Right $ LiteralExpr $ BoolObject True
let body2 = WhileStmt <$> cond1 <*> body1
body3 <- case initializer of
Just init -> return $ BlockStmt <$> ((\x y -> [x, y]) <$> init <*> body2)
Nothing -> return body2
return $ leftParen >> conditionSemicolon >> rightParen >> body3
2025-12-30 05:43:01 +03:00
ifStatement :: State ParserState (Either SyntaxError Stmt)
ifStatement = do
leftParenMaybe <- consume LEFT_PAREN $ SyntaxError "Expected '(' after 'if'"
conditionMaybe <- expression
rightParenMaybe <- consume RIGHT_PAREN $ SyntaxError "Expected ')' after if condition"
thenBranchMaybe <- statement
isElse <- isJust <$> matchToken [ELSE]
elseBranchMaybe <- if isElse then fmap Just <$> statement else return $ Right Nothing
return $ IfStmt <$> (leftParenMaybe >> conditionMaybe <* rightParenMaybe) <*> thenBranchMaybe <*> elseBranchMaybe
printStatement :: State ParserState (Either SyntaxError Stmt)
2025-12-30 01:27:01 +03:00
printStatement = do
valueMaybe <- expression
2025-12-30 05:43:01 +03:00
semicolonMaybe <- consume SEMICOLON $ SyntaxError "Expected ';'"
2025-12-30 01:27:01 +03:00
case (valueMaybe, semicolonMaybe) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
2025-12-30 02:56:06 +03:00
(Right value, Right _) -> return $ Right $ PrintStmt value
2025-12-30 01:27:01 +03:00
2025-12-30 05:43:01 +03:00
expressionStatement :: State ParserState (Either SyntaxError Stmt)
2025-12-30 01:27:01 +03:00
expressionStatement = do
valueMaybe <- expression
2025-12-30 05:43:01 +03:00
semicolonMaybe <- consume SEMICOLON $ SyntaxError "Expected ';'"
2025-12-30 01:27:01 +03:00
case (valueMaybe, semicolonMaybe) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
2025-12-30 02:56:06 +03:00
(Right value, Right _) -> return $ Right $ ExpressionStmt value
2025-12-30 01:27:01 +03:00
2025-12-30 06:43:29 +03:00
whileStatement :: State ParserState (Either SyntaxError Stmt)
whileStatement = do
leftParenMaybe <- consume LEFT_PAREN $ SyntaxError "Expected '(' after 'if'"
conditionMaybe <- expression
rightParenMaybe <- consume RIGHT_PAREN $ SyntaxError "Expected ')' after if condition"
bodyMaybe <- statement
return $ WhileStmt <$> (leftParenMaybe >> conditionMaybe <* rightParenMaybe) <*> bodyMaybe
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
expression :: State ParserState (Either SyntaxError Expr)
2025-12-30 02:56:06 +03:00
expression = assignment
2025-12-30 05:43:01 +03:00
assignment :: State ParserState (Either SyntaxError Expr)
2025-12-30 02:56:06 +03:00
assignment = do
2025-12-30 05:43:01 +03:00
maybeExpr <- logicalOr
2025-12-30 02:56:06 +03:00
matchedEqual <- isJust <$> matchToken [EQUAL]
if matchedEqual then do
maybeValue <- assignment
case (maybeExpr, maybeValue) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
(Right (VariableExpr name), Right value) -> return $ Right $ AssignmentExpr name value
2025-12-30 05:43:01 +03:00
_ -> return $ Left $ SyntaxError "Invalid assignment target"
2025-12-30 02:56:06 +03:00
else return maybeExpr
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
logicalOr :: State ParserState (Either SyntaxError Expr)
logicalOr = do
exprMaybe <- logicalAnd
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionLogicalMaybe expr) <$> matchTailLogical [OR] logicalAnd
logicalAnd :: State ParserState (Either SyntaxError Expr)
logicalAnd = do
exprMaybe <- equality
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionLogicalMaybe expr) <$> matchTailLogical [AND] logicalAnd
equality :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
equality = do
2025-12-29 23:57:15 +03:00
exprMaybe <- comparison
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionMaybe expr) <$> matchTail [BANG_EQUAL, EQUAL_EQUAL] comparison
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
comparison :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
comparison = do
2025-12-29 23:57:15 +03:00
exprMaybe <- term
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionMaybe expr) <$> matchTail [GREATER, GREATER_EQUAL, LESS, LESS_EQUAL] term
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
term :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
term = do
2025-12-29 23:57:15 +03:00
exprMaybe <- factor
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionMaybe expr) <$> matchTail [MINUS, PLUS] factor
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
factor :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
factor = do
2025-12-29 23:57:15 +03:00
exprMaybe <- unary
case exprMaybe of
Left err -> return $ Left err
Right expr -> fmap (mergeExpressionMaybe expr) <$> matchTail [SLASH, STAR] unary
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
unary :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
unary = do
maybeOperator <- matchToken [BANG, MINUS]
case maybeOperator of
Nothing -> primary
2025-12-29 23:57:15 +03:00
Just op -> do
exprMaybe <- unary
case exprMaybe of
Left err -> return $ Left err
2025-12-30 02:56:06 +03:00
Right expr -> return $ Right $ UnaryExpr op expr
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
primary :: State ParserState (Either SyntaxError Expr)
2025-12-25 17:55:28 +03:00
primary = do
token <- advance
2025-12-25 18:48:12 +03:00
case tokenType token of
2025-12-30 02:56:06 +03:00
FALSE -> return $ Right $ LiteralExpr $ BoolObject False
TRUE -> return $ Right $ LiteralExpr $ BoolObject True
NIL -> return $ Right $ LiteralExpr NullObject
NUMBER -> return $ Right $ LiteralExpr $ tokenObject token
STRING -> return $ Right $ LiteralExpr $ tokenObject token
2025-12-25 17:55:28 +03:00
LEFT_PAREN -> do
2025-12-29 23:57:15 +03:00
exprMaybe <- expression
case exprMaybe of
Left err -> return $ Left err
Right expr -> do
2025-12-30 05:43:01 +03:00
consume RIGHT_PAREN $ SyntaxError "Mismatched parentheses"
2025-12-30 02:56:06 +03:00
return $ Right $ GroupingExpr expr
IDENTIFIER -> return $ Right $ VariableExpr token
2025-12-30 05:43:01 +03:00
_ -> return $ Left $ SyntaxError "Expected expression"
matchTail :: [TokenType] -> State ParserState (Either SyntaxError Expr) -> State ParserState (Either SyntaxError (Maybe (Token, Expr)))
matchTail tokenTypes = matchTailWith tokenTypes mergeExpressionMaybe
2025-12-29 23:57:15 +03:00
2025-12-30 05:43:01 +03:00
matchTailLogical :: [TokenType] -> State ParserState (Either SyntaxError Expr) -> State ParserState (Either SyntaxError (Maybe (Token, Expr)))
matchTailLogical tokenTypes = matchTailWith tokenTypes mergeExpressionLogicalMaybe
matchTailWith :: [TokenType] -> (Expr -> Maybe (Token, Expr) -> Expr) -> State ParserState (Either SyntaxError Expr) -> State ParserState (Either SyntaxError (Maybe (Token, Expr)))
matchTailWith tokenTypes m f = do
2025-12-25 17:55:28 +03:00
maybeOperator <- matchToken tokenTypes
case maybeOperator of
2025-12-29 23:57:15 +03:00
Nothing -> return $ Right Nothing
2025-12-25 17:55:28 +03:00
Just op -> do
2025-12-29 23:57:15 +03:00
exprMaybe <- comparison
restMaybe <- matchTail tokenTypes f
case (exprMaybe, restMaybe) of
(Left err, _) -> return $ Left err
(_, Left err) -> return $ Left err
2025-12-30 05:43:01 +03:00
(Right expr, Right rest) -> return $ Right $ Just (op, m expr rest)
2025-12-29 23:57:15 +03:00
2025-12-25 17:55:28 +03:00
mergeExpressionMaybe :: Expr -> Maybe (Token, Expr) -> Expr
mergeExpressionMaybe expr Nothing = expr
2025-12-30 02:56:06 +03:00
mergeExpressionMaybe left (Just (op, right)) = BinaryExpr left op right
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
mergeExpressionLogicalMaybe :: Expr -> Maybe (Token, Expr) -> Expr
mergeExpressionLogicalMaybe expr Nothing = expr
mergeExpressionLogicalMaybe left (Just (op, right)) = LogicalExpr left op right
2025-12-25 17:55:28 +03:00
matchToken :: [TokenType] -> State ParserState (Maybe Token)
matchToken [] = return Nothing
matchToken (t:ts) = do
isMatch <- check t
if isMatch then Just <$> advance else matchToken ts
check :: TokenType -> State ParserState Bool
check t = do
atEnd <- isAtEnd
2025-12-25 18:48:12 +03:00
if atEnd then return False else (== t) . tokenType <$> peek
2025-12-25 17:55:28 +03:00
2025-12-30 05:43:01 +03:00
consume :: TokenType -> SyntaxError -> State ParserState (Either SyntaxError Token)
2025-12-29 23:57:15 +03:00
consume t err = do
2025-12-25 17:55:28 +03:00
isOk <- check t
2025-12-29 23:57:15 +03:00
if isOk then Right <$> advance else return $ Left err
2025-12-25 17:55:28 +03:00
advance :: State ParserState Token
advance = state (\s@ParserState {tokens=(t:ts)} -> (t, s {tokens = ts}))
peek :: State ParserState Token
peek = gets (head . tokens)
isAtEnd :: State ParserState Bool
2025-12-25 18:48:12 +03:00
isAtEnd = (== EOF) . tokenType <$> peek