2025-12-24 18:06:03 +03:00
|
|
|
module Lox.Scanner (
|
2025-12-25 17:55:28 +03:00
|
|
|
TokenType (..),
|
|
|
|
|
Object (..),
|
|
|
|
|
Token (getType, getLexeme, getObject),
|
2025-12-24 18:06:03 +03:00
|
|
|
scanTokensFromSource
|
|
|
|
|
) where
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
import Control.Monad.State.Lazy
|
|
|
|
|
import Control.Monad.Extra
|
2025-12-24 18:06:03 +03:00
|
|
|
import Data.Char
|
|
|
|
|
import Data.Maybe
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE
|
|
|
|
|
| COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR
|
|
|
|
|
| BANG | BANG_EQUAL
|
|
|
|
|
| EQUAL | EQUAL_EQUAL
|
|
|
|
|
| GREATER | GREATER_EQUAL
|
|
|
|
|
| LESS | LESS_EQUAL
|
|
|
|
|
| IDENTIFIER | STRING | NUMBER
|
|
|
|
|
| AND | CLASS | ELSE | FALSE | FUN | FOR | IF | NIL | OR
|
|
|
|
|
| PRINT | RETURN | SUPER | THIS | TRUE | VAR | WHILE
|
|
|
|
|
| EOF
|
2025-12-25 17:55:28 +03:00
|
|
|
deriving (Show, Eq)
|
2025-12-09 17:23:54 +03:00
|
|
|
|
2025-12-25 18:41:04 +03:00
|
|
|
data Object = NullObject
|
|
|
|
|
| StringObject String
|
|
|
|
|
| NumberObject Double
|
|
|
|
|
| BoolObject Bool
|
|
|
|
|
deriving (Eq)
|
|
|
|
|
|
|
|
|
|
instance Show Object where
|
|
|
|
|
show NullObject = "Nil"
|
|
|
|
|
show (StringObject s) = show s
|
|
|
|
|
show (NumberObject x) = show x
|
|
|
|
|
show (BoolObject False) = "false"
|
|
|
|
|
show (BoolObject True) = "true"
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
data Token = Token {
|
|
|
|
|
getType :: TokenType,
|
|
|
|
|
getLexeme :: String,
|
|
|
|
|
getObject :: Object,
|
|
|
|
|
getLineNumber :: Int
|
|
|
|
|
} deriving Show
|
|
|
|
|
|
2025-12-24 18:06:03 +03:00
|
|
|
data ScannerState = ScannerState {source :: String, current :: String, lineNumber :: Int}
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
emptyScannerState :: String -> ScannerState
|
2025-12-24 18:06:03 +03:00
|
|
|
emptyScannerState source =
|
|
|
|
|
ScannerState {source=source, current="", lineNumber=1}
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
scanTokensFromSource :: String -> [Token]
|
|
|
|
|
scanTokensFromSource source = evalState scanTokens (emptyScannerState source)
|
|
|
|
|
|
|
|
|
|
scanTokens :: State ScannerState [Token]
|
2025-12-24 18:06:03 +03:00
|
|
|
scanTokens = do
|
|
|
|
|
atEnd <- isAtEnd
|
2025-12-25 18:01:59 +03:00
|
|
|
if atEnd then return <$> addToken EOF else do
|
2025-12-24 18:06:03 +03:00
|
|
|
maybeToken <- scanToken
|
|
|
|
|
case maybeToken of
|
|
|
|
|
Nothing -> scanTokens
|
|
|
|
|
Just t -> (t :) <$> scanTokens
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
isAtEnd :: State ScannerState Bool
|
|
|
|
|
isAtEnd = gets scannerIsAtEnd
|
|
|
|
|
|
|
|
|
|
scannerIsAtEnd :: ScannerState -> Bool
|
2025-12-24 18:06:03 +03:00
|
|
|
scannerIsAtEnd ScannerState {source=source} = null source
|
2025-12-09 17:23:54 +03:00
|
|
|
|
2025-12-24 18:06:03 +03:00
|
|
|
scanToken :: State ScannerState (Maybe Token)
|
2025-12-09 17:23:54 +03:00
|
|
|
scanToken = do
|
2025-12-24 18:06:03 +03:00
|
|
|
resetCurrent
|
2025-12-09 17:23:54 +03:00
|
|
|
c <- advance
|
2025-12-24 18:06:03 +03:00
|
|
|
case c of
|
|
|
|
|
'(' -> Just <$> addToken LEFT_PAREN
|
|
|
|
|
')' -> Just <$> addToken RIGHT_PAREN
|
|
|
|
|
'{' -> Just <$> addToken LEFT_BRACE
|
|
|
|
|
'}' -> Just <$> addToken RIGHT_BRACE
|
|
|
|
|
',' -> Just <$> addToken COMMA
|
|
|
|
|
'.' -> Just <$> addToken DOT
|
|
|
|
|
'-' -> Just <$> addToken MINUS
|
|
|
|
|
'+' -> Just <$> addToken PLUS
|
|
|
|
|
';' -> Just <$> addToken SEMICOLON
|
|
|
|
|
'*' -> Just <$> addToken STAR
|
|
|
|
|
'!' -> Just <$> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG)
|
|
|
|
|
'=' -> Just <$> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL)
|
|
|
|
|
'<' -> Just <$> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS)
|
|
|
|
|
'>' -> Just <$> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER)
|
|
|
|
|
'/' -> ifM (match '/') (advanceLine >> return Nothing) (Just <$> addToken SLASH)
|
|
|
|
|
'"' -> Just <$> scanString
|
|
|
|
|
' ' -> return Nothing
|
|
|
|
|
'\r' -> return Nothing
|
|
|
|
|
'\t' -> return Nothing
|
|
|
|
|
'\n' -> return Nothing
|
|
|
|
|
c -> if isDigit c then Just <$> scanNumber else if isAlpha c then Just <$> scanIdentifier else error "Unexpected character"
|
|
|
|
|
|
|
|
|
|
scanString :: State ScannerState Token
|
|
|
|
|
scanString = do
|
|
|
|
|
whileM (do
|
|
|
|
|
c <- peek
|
|
|
|
|
atEnd <- isAtEnd
|
|
|
|
|
unless (c == '"' || atEnd) (do
|
|
|
|
|
when (c == '\n') (modify (\s -> s {lineNumber=lineNumber s + 1}))
|
|
|
|
|
advance
|
|
|
|
|
return ())
|
|
|
|
|
return $ not (c == '"' || atEnd))
|
|
|
|
|
advance
|
|
|
|
|
value <- gets (init . tail . current)
|
|
|
|
|
addLiteralToken STRING (StringObject value)
|
|
|
|
|
|
|
|
|
|
scanNumber :: State ScannerState Token
|
|
|
|
|
scanNumber = do
|
|
|
|
|
advanceWhile isDigit
|
|
|
|
|
isFraction <- ((&&) . (== '.') <$> peek) <*> (isDigit <$> peekNext)
|
|
|
|
|
when isFraction (advance >> advanceWhile isDigit)
|
|
|
|
|
value <- gets (read . current)
|
|
|
|
|
addLiteralToken NUMBER (NumberObject value)
|
|
|
|
|
|
|
|
|
|
advanceWhile:: (Char -> Bool) -> State ScannerState ()
|
|
|
|
|
advanceWhile pred = do
|
|
|
|
|
c <- peek
|
|
|
|
|
when (pred c) (advance >> advanceWhile pred)
|
|
|
|
|
|
|
|
|
|
scanIdentifier :: State ScannerState Token
|
|
|
|
|
scanIdentifier = do
|
|
|
|
|
advanceWhile isAlphaNum
|
|
|
|
|
value <- gets current
|
|
|
|
|
let tokenType = getKeywordTokenType value
|
|
|
|
|
addToken tokenType
|
|
|
|
|
|
|
|
|
|
resetCurrent :: State ScannerState ()
|
|
|
|
|
resetCurrent = modify (\state -> state {current=""})
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
advance :: State ScannerState Char
|
|
|
|
|
advance = state scannerAdvance
|
|
|
|
|
|
|
|
|
|
scannerAdvance :: ScannerState -> (Char, ScannerState)
|
2025-12-24 18:06:03 +03:00
|
|
|
scannerAdvance state@ScannerState {source=(c:cs), current=current} =
|
|
|
|
|
(c, state {source=cs, current=current ++ [c]})
|
|
|
|
|
scannerAdvance state@ScannerState {source=""} = ('\0', state)
|
|
|
|
|
|
|
|
|
|
advanceLine :: State ScannerState ()
|
|
|
|
|
advanceLine = do
|
|
|
|
|
c <- advance
|
|
|
|
|
atEnd <- isAtEnd
|
|
|
|
|
unless (c == '\n' || atEnd) advanceLine
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
match :: Char -> State ScannerState Bool
|
|
|
|
|
match c = state (scannerMatch c)
|
|
|
|
|
|
|
|
|
|
scannerMatch :: Char -> ScannerState -> (Bool, ScannerState)
|
2025-12-24 18:06:03 +03:00
|
|
|
scannerMatch matchChar state@ScannerState {source=(sourceChar:sourceTail), current=current} =
|
|
|
|
|
(matchChar == sourceChar, state {source=source, current=newCurrent})
|
2025-12-09 17:23:54 +03:00
|
|
|
where source = if matchChar == sourceChar then sourceTail else sourceChar : sourceTail
|
2025-12-24 18:06:03 +03:00
|
|
|
newCurrent = if matchChar == sourceChar then current ++ [sourceChar] else current
|
|
|
|
|
scannerMatch _ state@ScannerState {source=""} = (False, state)
|
2025-12-09 17:23:54 +03:00
|
|
|
|
|
|
|
|
peek :: State ScannerState Char
|
2025-12-24 18:06:03 +03:00
|
|
|
peek = gets (\s -> if null $ source s then '\0' else head $ source s)
|
|
|
|
|
|
|
|
|
|
peekNext :: State ScannerState Char
|
|
|
|
|
peekNext = gets (\s -> if null (source s) || null ( tail $ source s) then '\0' else head $ tail $ source s)
|
|
|
|
|
|
|
|
|
|
addToken :: TokenType -> State ScannerState Token
|
|
|
|
|
addToken token = state $ scannerAddLiteralToken token NullObject
|
|
|
|
|
|
|
|
|
|
addLiteralToken :: TokenType -> Object -> State ScannerState Token
|
|
|
|
|
addLiteralToken token object = state $ scannerAddLiteralToken token object
|
|
|
|
|
|
|
|
|
|
scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> (Token, ScannerState)
|
|
|
|
|
scannerAddLiteralToken tokenType object state@ScannerState {current=current, lineNumber=lineNumber} =
|
|
|
|
|
(token, state)
|
|
|
|
|
where token = Token {getType=tokenType, getLexeme=current, getObject=object, getLineNumber=lineNumber}
|
|
|
|
|
|
|
|
|
|
getKeywordTokenType :: String -> TokenType
|
|
|
|
|
getKeywordTokenType "and" = AND
|
|
|
|
|
getKeywordTokenType "class" = CLASS
|
|
|
|
|
getKeywordTokenType "else" = ELSE
|
|
|
|
|
getKeywordTokenType "false" = FALSE
|
|
|
|
|
getKeywordTokenType "for" = FOR
|
|
|
|
|
getKeywordTokenType "fun" = FUN
|
|
|
|
|
getKeywordTokenType "if" = IF
|
|
|
|
|
getKeywordTokenType "nil" = NIL
|
|
|
|
|
getKeywordTokenType "or" = OR
|
|
|
|
|
getKeywordTokenType "print" = PRINT
|
|
|
|
|
getKeywordTokenType "return" = RETURN
|
|
|
|
|
getKeywordTokenType "super" = SUPER
|
|
|
|
|
getKeywordTokenType "this" = THIS
|
|
|
|
|
getKeywordTokenType "true" = TRUE
|
|
|
|
|
getKeywordTokenType "var" = VAR
|
|
|
|
|
getKeywordTokenType "while" = WHILE
|
|
|
|
|
getKeywordTokenType _ = IDENTIFIER
|
2025-12-09 17:23:54 +03:00
|
|
|
|