scanner finished

This commit is contained in:
Oleg Sobolev 2025-12-24 18:06:03 +03:00
parent bcfdc496ed
commit a076e83af2
2 changed files with 127 additions and 38 deletions

View file

@ -4,7 +4,7 @@ import Lox.Scanner
run :: String -> IO () run :: String -> IO ()
run source = mapM_ print tokens run source = mapM_ print tokens
where tokens = scanTokens source where tokens = scanTokensFromSource source
main :: IO () main :: IO ()
main = putStrLn "Hello, Haskell!" main = getLine >>= run

View file

@ -1,7 +1,14 @@
module Lox.Scanner where module Lox.Scanner (
TokenType,
Object,
Token,
scanTokensFromSource
) where
import Control.Monad.State.Lazy import Control.Monad.State.Lazy
import Control.Monad.Extra import Control.Monad.Extra
import Data.Char
import Data.Maybe
data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE
| COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR | COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR
@ -15,8 +22,7 @@ data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE
| EOF | EOF
deriving Show deriving Show
-- undefined for now data Object = NullObject | StringObject String | NumberObject Double deriving Show
data Object = NullObject deriving Show
data Token = Token { data Token = Token {
getType :: TokenType, getType :: TokenType,
@ -25,68 +31,151 @@ data Token = Token {
getLineNumber :: Int getLineNumber :: Int
} deriving Show } deriving Show
data ScannerState = ScannerState {source :: String, tokens :: [Token]} data ScannerState = ScannerState {source :: String, current :: String, lineNumber :: Int}
emptyScannerState :: String -> ScannerState emptyScannerState :: String -> ScannerState
emptyScannerState source = ScannerState {source=source, tokens=[]} emptyScannerState source =
ScannerState {source=source, current="", lineNumber=1}
scanTokensFromSource :: String -> [Token] scanTokensFromSource :: String -> [Token]
scanTokensFromSource source = evalState scanTokens (emptyScannerState source) scanTokensFromSource source = evalState scanTokens (emptyScannerState source)
scanTokens :: State ScannerState [Token] scanTokens :: State ScannerState [Token]
scanTokens = whileM (scanToken >> (not <$> isAtEnd)) >> gets (reverse . tokens) scanTokens = do
atEnd <- isAtEnd
if atEnd then return [] else do
maybeToken <- scanToken
case maybeToken of
Nothing -> scanTokens
Just t -> (t :) <$> scanTokens
isAtEnd :: State ScannerState Bool isAtEnd :: State ScannerState Bool
isAtEnd = gets scannerIsAtEnd isAtEnd = gets scannerIsAtEnd
scannerIsAtEnd :: ScannerState -> Bool scannerIsAtEnd :: ScannerState -> Bool
scannerIsAtEnd ScannerState {source=source, tokens=_} = null source scannerIsAtEnd ScannerState {source=source} = null source
scanToken :: State ScannerState () scanToken :: State ScannerState (Maybe Token)
scanToken = do scanToken = do
resetCurrent
c <- advance c <- advance
token <- case c of case c of
'(' -> addToken LEFT_PAREN '(' -> Just <$> addToken LEFT_PAREN
')' -> addToken RIGHT_PAREN ')' -> Just <$> addToken RIGHT_PAREN
'{' -> addToken LEFT_BRACE '{' -> Just <$> addToken LEFT_BRACE
'}' -> addToken RIGHT_BRACE '}' -> Just <$> addToken RIGHT_BRACE
',' -> addToken COMMA ',' -> Just <$> addToken COMMA
'.' -> addToken DOT '.' -> Just <$> addToken DOT
'-' -> addToken MINUS '-' -> Just <$> addToken MINUS
'+' -> addToken PLUS '+' -> Just <$> addToken PLUS
';' -> addToken SEMICOLON ';' -> Just <$> addToken SEMICOLON
'*' -> addToken STAR '*' -> Just <$> addToken STAR
'!' -> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG) '!' -> Just <$> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG)
'=' -> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL) '=' -> Just <$> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL)
'<' -> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS) '<' -> Just <$> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS)
'>' -> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER) '>' -> Just <$> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER)
_ -> error "Lexical error" -- TODO error handling '/' -> ifM (match '/') (advanceLine >> return Nothing) (Just <$> addToken SLASH)
return () '"' -> Just <$> scanString
' ' -> return Nothing
'\r' -> return Nothing
'\t' -> return Nothing
'\n' -> return Nothing
c -> if isDigit c then Just <$> scanNumber else if isAlpha c then Just <$> scanIdentifier else error "Unexpected character"
scanString :: State ScannerState Token
scanString = do
whileM (do
c <- peek
atEnd <- isAtEnd
unless (c == '"' || atEnd) (do
when (c == '\n') (modify (\s -> s {lineNumber=lineNumber s + 1}))
advance
return ())
return $ not (c == '"' || atEnd))
advance
value <- gets (init . tail . current)
addLiteralToken STRING (StringObject value)
scanNumber :: State ScannerState Token
scanNumber = do
advanceWhile isDigit
isFraction <- ((&&) . (== '.') <$> peek) <*> (isDigit <$> peekNext)
when isFraction (advance >> advanceWhile isDigit)
value <- gets (read . current)
addLiteralToken NUMBER (NumberObject value)
advanceWhile:: (Char -> Bool) -> State ScannerState ()
advanceWhile pred = do
c <- peek
when (pred c) (advance >> advanceWhile pred)
scanIdentifier :: State ScannerState Token
scanIdentifier = do
advanceWhile isAlphaNum
value <- gets current
let tokenType = getKeywordTokenType value
addToken tokenType
resetCurrent :: State ScannerState ()
resetCurrent = modify (\state -> state {current=""})
advance :: State ScannerState Char advance :: State ScannerState Char
advance = state scannerAdvance advance = state scannerAdvance
scannerAdvance :: ScannerState -> (Char, ScannerState) scannerAdvance :: ScannerState -> (Char, ScannerState)
scannerAdvance ScannerState {source=(c:cs), tokens=tokens} = (c, ScannerState {source=cs, tokens=tokens}) scannerAdvance state@ScannerState {source=(c:cs), current=current} =
(c, state {source=cs, current=current ++ [c]})
scannerAdvance state@ScannerState {source=""} = ('\0', state)
advanceLine :: State ScannerState ()
advanceLine = do
c <- advance
atEnd <- isAtEnd
unless (c == '\n' || atEnd) advanceLine
match :: Char -> State ScannerState Bool match :: Char -> State ScannerState Bool
match c = state (scannerMatch c) match c = state (scannerMatch c)
scannerMatch :: Char -> ScannerState -> (Bool, ScannerState) scannerMatch :: Char -> ScannerState -> (Bool, ScannerState)
scannerMatch matchChar ScannerState {source=(sourceChar:sourceTail), tokens=tokens} = (matchChar == sourceChar, ScannerState {source=source, tokens=tokens}) scannerMatch matchChar state@ScannerState {source=(sourceChar:sourceTail), current=current} =
(matchChar == sourceChar, state {source=source, current=newCurrent})
where source = if matchChar == sourceChar then sourceTail else sourceChar : sourceTail where source = if matchChar == sourceChar then sourceTail else sourceChar : sourceTail
scannerMatch _ state@ScannerState {source="", tokens=_} = (False, state) newCurrent = if matchChar == sourceChar then current ++ [sourceChar] else current
scannerMatch _ state@ScannerState {source=""} = (False, state)
peek :: State ScannerState Char peek :: State ScannerState Char
peek = gets $ head . source peek = gets (\s -> if null $ source s then '\0' else head $ source s)
addToken :: TokenType -> State ScannerState () peekNext :: State ScannerState Char
addToken token = modify $ scannerAddLiteralToken token NullObject peekNext = gets (\s -> if null (source s) || null ( tail $ source s) then '\0' else head $ tail $ source s)
addLiteralToken :: TokenType -> Object -> State ScannerState () addToken :: TokenType -> State ScannerState Token
addLiteralToken token object = modify $ scannerAddLiteralToken token object addToken token = state $ scannerAddLiteralToken token NullObject
scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> ScannerState addLiteralToken :: TokenType -> Object -> State ScannerState Token
scannerAddLiteralToken tokenType object ScannerState {source=source, tokens=tokens} = ScannerState {source=source, tokens=token : tokens} addLiteralToken token object = state $ scannerAddLiteralToken token object
where token = Token {getType=tokenType, getLexeme="", getObject=object, getLineNumber= -1}
scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> (Token, ScannerState)
scannerAddLiteralToken tokenType object state@ScannerState {current=current, lineNumber=lineNumber} =
(token, state)
where token = Token {getType=tokenType, getLexeme=current, getObject=object, getLineNumber=lineNumber}
getKeywordTokenType :: String -> TokenType
getKeywordTokenType "and" = AND
getKeywordTokenType "class" = CLASS
getKeywordTokenType "else" = ELSE
getKeywordTokenType "false" = FALSE
getKeywordTokenType "for" = FOR
getKeywordTokenType "fun" = FUN
getKeywordTokenType "if" = IF
getKeywordTokenType "nil" = NIL
getKeywordTokenType "or" = OR
getKeywordTokenType "print" = PRINT
getKeywordTokenType "return" = RETURN
getKeywordTokenType "super" = SUPER
getKeywordTokenType "this" = THIS
getKeywordTokenType "true" = TRUE
getKeywordTokenType "var" = VAR
getKeywordTokenType "while" = WHILE
getKeywordTokenType _ = IDENTIFIER