scanner finished
This commit is contained in:
parent
bcfdc496ed
commit
a076e83af2
2 changed files with 127 additions and 38 deletions
|
|
@ -4,7 +4,7 @@ import Lox.Scanner
|
|||
|
||||
run :: String -> IO ()
|
||||
run source = mapM_ print tokens
|
||||
where tokens = scanTokens source
|
||||
where tokens = scanTokensFromSource source
|
||||
|
||||
main :: IO ()
|
||||
main = putStrLn "Hello, Haskell!"
|
||||
main = getLine >>= run
|
||||
|
|
|
|||
|
|
@ -1,7 +1,14 @@
|
|||
module Lox.Scanner where
|
||||
module Lox.Scanner (
|
||||
TokenType,
|
||||
Object,
|
||||
Token,
|
||||
scanTokensFromSource
|
||||
) where
|
||||
|
||||
import Control.Monad.State.Lazy
|
||||
import Control.Monad.Extra
|
||||
import Data.Char
|
||||
import Data.Maybe
|
||||
|
||||
data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE
|
||||
| COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR
|
||||
|
|
@ -15,8 +22,7 @@ data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE
|
|||
| EOF
|
||||
deriving Show
|
||||
|
||||
-- undefined for now
|
||||
data Object = NullObject deriving Show
|
||||
data Object = NullObject | StringObject String | NumberObject Double deriving Show
|
||||
|
||||
data Token = Token {
|
||||
getType :: TokenType,
|
||||
|
|
@ -25,68 +31,151 @@ data Token = Token {
|
|||
getLineNumber :: Int
|
||||
} deriving Show
|
||||
|
||||
data ScannerState = ScannerState {source :: String, tokens :: [Token]}
|
||||
data ScannerState = ScannerState {source :: String, current :: String, lineNumber :: Int}
|
||||
|
||||
emptyScannerState :: String -> ScannerState
|
||||
emptyScannerState source = ScannerState {source=source, tokens=[]}
|
||||
emptyScannerState source =
|
||||
ScannerState {source=source, current="", lineNumber=1}
|
||||
|
||||
scanTokensFromSource :: String -> [Token]
|
||||
scanTokensFromSource source = evalState scanTokens (emptyScannerState source)
|
||||
|
||||
scanTokens :: State ScannerState [Token]
|
||||
scanTokens = whileM (scanToken >> (not <$> isAtEnd)) >> gets (reverse . tokens)
|
||||
scanTokens = do
|
||||
atEnd <- isAtEnd
|
||||
if atEnd then return [] else do
|
||||
maybeToken <- scanToken
|
||||
case maybeToken of
|
||||
Nothing -> scanTokens
|
||||
Just t -> (t :) <$> scanTokens
|
||||
|
||||
isAtEnd :: State ScannerState Bool
|
||||
isAtEnd = gets scannerIsAtEnd
|
||||
|
||||
scannerIsAtEnd :: ScannerState -> Bool
|
||||
scannerIsAtEnd ScannerState {source=source, tokens=_} = null source
|
||||
scannerIsAtEnd ScannerState {source=source} = null source
|
||||
|
||||
scanToken :: State ScannerState ()
|
||||
scanToken :: State ScannerState (Maybe Token)
|
||||
scanToken = do
|
||||
resetCurrent
|
||||
c <- advance
|
||||
token <- case c of
|
||||
'(' -> addToken LEFT_PAREN
|
||||
')' -> addToken RIGHT_PAREN
|
||||
'{' -> addToken LEFT_BRACE
|
||||
'}' -> addToken RIGHT_BRACE
|
||||
',' -> addToken COMMA
|
||||
'.' -> addToken DOT
|
||||
'-' -> addToken MINUS
|
||||
'+' -> addToken PLUS
|
||||
';' -> addToken SEMICOLON
|
||||
'*' -> addToken STAR
|
||||
'!' -> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG)
|
||||
'=' -> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL)
|
||||
'<' -> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS)
|
||||
'>' -> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER)
|
||||
_ -> error "Lexical error" -- TODO error handling
|
||||
return ()
|
||||
case c of
|
||||
'(' -> Just <$> addToken LEFT_PAREN
|
||||
')' -> Just <$> addToken RIGHT_PAREN
|
||||
'{' -> Just <$> addToken LEFT_BRACE
|
||||
'}' -> Just <$> addToken RIGHT_BRACE
|
||||
',' -> Just <$> addToken COMMA
|
||||
'.' -> Just <$> addToken DOT
|
||||
'-' -> Just <$> addToken MINUS
|
||||
'+' -> Just <$> addToken PLUS
|
||||
';' -> Just <$> addToken SEMICOLON
|
||||
'*' -> Just <$> addToken STAR
|
||||
'!' -> Just <$> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG)
|
||||
'=' -> Just <$> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL)
|
||||
'<' -> Just <$> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS)
|
||||
'>' -> Just <$> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER)
|
||||
'/' -> ifM (match '/') (advanceLine >> return Nothing) (Just <$> addToken SLASH)
|
||||
'"' -> Just <$> scanString
|
||||
' ' -> return Nothing
|
||||
'\r' -> return Nothing
|
||||
'\t' -> return Nothing
|
||||
'\n' -> return Nothing
|
||||
c -> if isDigit c then Just <$> scanNumber else if isAlpha c then Just <$> scanIdentifier else error "Unexpected character"
|
||||
|
||||
scanString :: State ScannerState Token
|
||||
scanString = do
|
||||
whileM (do
|
||||
c <- peek
|
||||
atEnd <- isAtEnd
|
||||
unless (c == '"' || atEnd) (do
|
||||
when (c == '\n') (modify (\s -> s {lineNumber=lineNumber s + 1}))
|
||||
advance
|
||||
return ())
|
||||
return $ not (c == '"' || atEnd))
|
||||
advance
|
||||
value <- gets (init . tail . current)
|
||||
addLiteralToken STRING (StringObject value)
|
||||
|
||||
scanNumber :: State ScannerState Token
|
||||
scanNumber = do
|
||||
advanceWhile isDigit
|
||||
isFraction <- ((&&) . (== '.') <$> peek) <*> (isDigit <$> peekNext)
|
||||
when isFraction (advance >> advanceWhile isDigit)
|
||||
value <- gets (read . current)
|
||||
addLiteralToken NUMBER (NumberObject value)
|
||||
|
||||
advanceWhile:: (Char -> Bool) -> State ScannerState ()
|
||||
advanceWhile pred = do
|
||||
c <- peek
|
||||
when (pred c) (advance >> advanceWhile pred)
|
||||
|
||||
scanIdentifier :: State ScannerState Token
|
||||
scanIdentifier = do
|
||||
advanceWhile isAlphaNum
|
||||
value <- gets current
|
||||
let tokenType = getKeywordTokenType value
|
||||
addToken tokenType
|
||||
|
||||
resetCurrent :: State ScannerState ()
|
||||
resetCurrent = modify (\state -> state {current=""})
|
||||
|
||||
advance :: State ScannerState Char
|
||||
advance = state scannerAdvance
|
||||
|
||||
scannerAdvance :: ScannerState -> (Char, ScannerState)
|
||||
scannerAdvance ScannerState {source=(c:cs), tokens=tokens} = (c, ScannerState {source=cs, tokens=tokens})
|
||||
scannerAdvance state@ScannerState {source=(c:cs), current=current} =
|
||||
(c, state {source=cs, current=current ++ [c]})
|
||||
scannerAdvance state@ScannerState {source=""} = ('\0', state)
|
||||
|
||||
advanceLine :: State ScannerState ()
|
||||
advanceLine = do
|
||||
c <- advance
|
||||
atEnd <- isAtEnd
|
||||
unless (c == '\n' || atEnd) advanceLine
|
||||
|
||||
match :: Char -> State ScannerState Bool
|
||||
match c = state (scannerMatch c)
|
||||
|
||||
scannerMatch :: Char -> ScannerState -> (Bool, ScannerState)
|
||||
scannerMatch matchChar ScannerState {source=(sourceChar:sourceTail), tokens=tokens} = (matchChar == sourceChar, ScannerState {source=source, tokens=tokens})
|
||||
scannerMatch matchChar state@ScannerState {source=(sourceChar:sourceTail), current=current} =
|
||||
(matchChar == sourceChar, state {source=source, current=newCurrent})
|
||||
where source = if matchChar == sourceChar then sourceTail else sourceChar : sourceTail
|
||||
scannerMatch _ state@ScannerState {source="", tokens=_} = (False, state)
|
||||
newCurrent = if matchChar == sourceChar then current ++ [sourceChar] else current
|
||||
scannerMatch _ state@ScannerState {source=""} = (False, state)
|
||||
|
||||
peek :: State ScannerState Char
|
||||
peek = gets $ head . source
|
||||
peek = gets (\s -> if null $ source s then '\0' else head $ source s)
|
||||
|
||||
addToken :: TokenType -> State ScannerState ()
|
||||
addToken token = modify $ scannerAddLiteralToken token NullObject
|
||||
peekNext :: State ScannerState Char
|
||||
peekNext = gets (\s -> if null (source s) || null ( tail $ source s) then '\0' else head $ tail $ source s)
|
||||
|
||||
addLiteralToken :: TokenType -> Object -> State ScannerState ()
|
||||
addLiteralToken token object = modify $ scannerAddLiteralToken token object
|
||||
addToken :: TokenType -> State ScannerState Token
|
||||
addToken token = state $ scannerAddLiteralToken token NullObject
|
||||
|
||||
scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> ScannerState
|
||||
scannerAddLiteralToken tokenType object ScannerState {source=source, tokens=tokens} = ScannerState {source=source, tokens=token : tokens}
|
||||
where token = Token {getType=tokenType, getLexeme="", getObject=object, getLineNumber= -1}
|
||||
addLiteralToken :: TokenType -> Object -> State ScannerState Token
|
||||
addLiteralToken token object = state $ scannerAddLiteralToken token object
|
||||
|
||||
scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> (Token, ScannerState)
|
||||
scannerAddLiteralToken tokenType object state@ScannerState {current=current, lineNumber=lineNumber} =
|
||||
(token, state)
|
||||
where token = Token {getType=tokenType, getLexeme=current, getObject=object, getLineNumber=lineNumber}
|
||||
|
||||
getKeywordTokenType :: String -> TokenType
|
||||
getKeywordTokenType "and" = AND
|
||||
getKeywordTokenType "class" = CLASS
|
||||
getKeywordTokenType "else" = ELSE
|
||||
getKeywordTokenType "false" = FALSE
|
||||
getKeywordTokenType "for" = FOR
|
||||
getKeywordTokenType "fun" = FUN
|
||||
getKeywordTokenType "if" = IF
|
||||
getKeywordTokenType "nil" = NIL
|
||||
getKeywordTokenType "or" = OR
|
||||
getKeywordTokenType "print" = PRINT
|
||||
getKeywordTokenType "return" = RETURN
|
||||
getKeywordTokenType "super" = SUPER
|
||||
getKeywordTokenType "this" = THIS
|
||||
getKeywordTokenType "true" = TRUE
|
||||
getKeywordTokenType "var" = VAR
|
||||
getKeywordTokenType "while" = WHILE
|
||||
getKeywordTokenType _ = IDENTIFIER
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue