From a076e83af2d173580c946a952fe4fdd8f3bd1204 Mon Sep 17 00:00:00 2001 From: vvsob Date: Wed, 24 Dec 2025 18:06:03 +0300 Subject: [PATCH] scanner finished --- app/Main.hs | 4 +- src/Lox/Scanner.hs | 161 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 127 insertions(+), 38 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 432a5a5..2289177 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -4,7 +4,7 @@ import Lox.Scanner run :: String -> IO () run source = mapM_ print tokens - where tokens = scanTokens source + where tokens = scanTokensFromSource source main :: IO () -main = putStrLn "Hello, Haskell!" +main = getLine >>= run diff --git a/src/Lox/Scanner.hs b/src/Lox/Scanner.hs index 29d0f6e..303a191 100644 --- a/src/Lox/Scanner.hs +++ b/src/Lox/Scanner.hs @@ -1,7 +1,14 @@ -module Lox.Scanner where +module Lox.Scanner ( + TokenType, + Object, + Token, + scanTokensFromSource +) where import Control.Monad.State.Lazy import Control.Monad.Extra +import Data.Char +import Data.Maybe data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE | COMMA | DOT | MINUS | PLUS | SEMICOLON | SLASH | STAR @@ -15,8 +22,7 @@ data TokenType = LEFT_PAREN | RIGHT_PAREN | LEFT_BRACE | RIGHT_BRACE | EOF deriving Show --- undefined for now -data Object = NullObject deriving Show +data Object = NullObject | StringObject String | NumberObject Double deriving Show data Token = Token { getType :: TokenType, @@ -25,68 +31,151 @@ data Token = Token { getLineNumber :: Int } deriving Show -data ScannerState = ScannerState {source :: String, tokens :: [Token]} +data ScannerState = ScannerState {source :: String, current :: String, lineNumber :: Int} emptyScannerState :: String -> ScannerState -emptyScannerState source = ScannerState {source=source, tokens=[]} +emptyScannerState source = + ScannerState {source=source, current="", lineNumber=1} scanTokensFromSource :: String -> [Token] scanTokensFromSource source = evalState scanTokens (emptyScannerState source) scanTokens :: State ScannerState [Token] -scanTokens = whileM (scanToken >> (not <$> isAtEnd)) >> gets (reverse . tokens) +scanTokens = do + atEnd <- isAtEnd + if atEnd then return [] else do + maybeToken <- scanToken + case maybeToken of + Nothing -> scanTokens + Just t -> (t :) <$> scanTokens isAtEnd :: State ScannerState Bool isAtEnd = gets scannerIsAtEnd scannerIsAtEnd :: ScannerState -> Bool -scannerIsAtEnd ScannerState {source=source, tokens=_} = null source +scannerIsAtEnd ScannerState {source=source} = null source -scanToken :: State ScannerState () +scanToken :: State ScannerState (Maybe Token) scanToken = do + resetCurrent c <- advance - token <- case c of - '(' -> addToken LEFT_PAREN - ')' -> addToken RIGHT_PAREN - '{' -> addToken LEFT_BRACE - '}' -> addToken RIGHT_BRACE - ',' -> addToken COMMA - '.' -> addToken DOT - '-' -> addToken MINUS - '+' -> addToken PLUS - ';' -> addToken SEMICOLON - '*' -> addToken STAR - '!' -> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG) - '=' -> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL) - '<' -> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS) - '>' -> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER) - _ -> error "Lexical error" -- TODO error handling - return () + case c of + '(' -> Just <$> addToken LEFT_PAREN + ')' -> Just <$> addToken RIGHT_PAREN + '{' -> Just <$> addToken LEFT_BRACE + '}' -> Just <$> addToken RIGHT_BRACE + ',' -> Just <$> addToken COMMA + '.' -> Just <$> addToken DOT + '-' -> Just <$> addToken MINUS + '+' -> Just <$> addToken PLUS + ';' -> Just <$> addToken SEMICOLON + '*' -> Just <$> addToken STAR + '!' -> Just <$> ifM (match '=') (addToken BANG_EQUAL) (addToken BANG) + '=' -> Just <$> ifM (match '=') (addToken EQUAL_EQUAL) (addToken EQUAL) + '<' -> Just <$> ifM (match '=') (addToken LESS_EQUAL) (addToken LESS) + '>' -> Just <$> ifM (match '=') (addToken GREATER_EQUAL) (addToken GREATER) + '/' -> ifM (match '/') (advanceLine >> return Nothing) (Just <$> addToken SLASH) + '"' -> Just <$> scanString + ' ' -> return Nothing + '\r' -> return Nothing + '\t' -> return Nothing + '\n' -> return Nothing + c -> if isDigit c then Just <$> scanNumber else if isAlpha c then Just <$> scanIdentifier else error "Unexpected character" + +scanString :: State ScannerState Token +scanString = do + whileM (do + c <- peek + atEnd <- isAtEnd + unless (c == '"' || atEnd) (do + when (c == '\n') (modify (\s -> s {lineNumber=lineNumber s + 1})) + advance + return ()) + return $ not (c == '"' || atEnd)) + advance + value <- gets (init . tail . current) + addLiteralToken STRING (StringObject value) + +scanNumber :: State ScannerState Token +scanNumber = do + advanceWhile isDigit + isFraction <- ((&&) . (== '.') <$> peek) <*> (isDigit <$> peekNext) + when isFraction (advance >> advanceWhile isDigit) + value <- gets (read . current) + addLiteralToken NUMBER (NumberObject value) + +advanceWhile:: (Char -> Bool) -> State ScannerState () +advanceWhile pred = do + c <- peek + when (pred c) (advance >> advanceWhile pred) + +scanIdentifier :: State ScannerState Token +scanIdentifier = do + advanceWhile isAlphaNum + value <- gets current + let tokenType = getKeywordTokenType value + addToken tokenType + +resetCurrent :: State ScannerState () +resetCurrent = modify (\state -> state {current=""}) advance :: State ScannerState Char advance = state scannerAdvance scannerAdvance :: ScannerState -> (Char, ScannerState) -scannerAdvance ScannerState {source=(c:cs), tokens=tokens} = (c, ScannerState {source=cs, tokens=tokens}) +scannerAdvance state@ScannerState {source=(c:cs), current=current} = + (c, state {source=cs, current=current ++ [c]}) +scannerAdvance state@ScannerState {source=""} = ('\0', state) + +advanceLine :: State ScannerState () +advanceLine = do + c <- advance + atEnd <- isAtEnd + unless (c == '\n' || atEnd) advanceLine match :: Char -> State ScannerState Bool match c = state (scannerMatch c) scannerMatch :: Char -> ScannerState -> (Bool, ScannerState) -scannerMatch matchChar ScannerState {source=(sourceChar:sourceTail), tokens=tokens} = (matchChar == sourceChar, ScannerState {source=source, tokens=tokens}) +scannerMatch matchChar state@ScannerState {source=(sourceChar:sourceTail), current=current} = + (matchChar == sourceChar, state {source=source, current=newCurrent}) where source = if matchChar == sourceChar then sourceTail else sourceChar : sourceTail -scannerMatch _ state@ScannerState {source="", tokens=_} = (False, state) + newCurrent = if matchChar == sourceChar then current ++ [sourceChar] else current +scannerMatch _ state@ScannerState {source=""} = (False, state) peek :: State ScannerState Char -peek = gets $ head . source +peek = gets (\s -> if null $ source s then '\0' else head $ source s) -addToken :: TokenType -> State ScannerState () -addToken token = modify $ scannerAddLiteralToken token NullObject +peekNext :: State ScannerState Char +peekNext = gets (\s -> if null (source s) || null ( tail $ source s) then '\0' else head $ tail $ source s) -addLiteralToken :: TokenType -> Object -> State ScannerState () -addLiteralToken token object = modify $ scannerAddLiteralToken token object +addToken :: TokenType -> State ScannerState Token +addToken token = state $ scannerAddLiteralToken token NullObject -scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> ScannerState -scannerAddLiteralToken tokenType object ScannerState {source=source, tokens=tokens} = ScannerState {source=source, tokens=token : tokens} - where token = Token {getType=tokenType, getLexeme="", getObject=object, getLineNumber= -1} +addLiteralToken :: TokenType -> Object -> State ScannerState Token +addLiteralToken token object = state $ scannerAddLiteralToken token object + +scannerAddLiteralToken :: TokenType -> Object -> ScannerState -> (Token, ScannerState) +scannerAddLiteralToken tokenType object state@ScannerState {current=current, lineNumber=lineNumber} = + (token, state) + where token = Token {getType=tokenType, getLexeme=current, getObject=object, getLineNumber=lineNumber} + +getKeywordTokenType :: String -> TokenType +getKeywordTokenType "and" = AND +getKeywordTokenType "class" = CLASS +getKeywordTokenType "else" = ELSE +getKeywordTokenType "false" = FALSE +getKeywordTokenType "for" = FOR +getKeywordTokenType "fun" = FUN +getKeywordTokenType "if" = IF +getKeywordTokenType "nil" = NIL +getKeywordTokenType "or" = OR +getKeywordTokenType "print" = PRINT +getKeywordTokenType "return" = RETURN +getKeywordTokenType "super" = SUPER +getKeywordTokenType "this" = THIS +getKeywordTokenType "true" = TRUE +getKeywordTokenType "var" = VAR +getKeywordTokenType "while" = WHILE +getKeywordTokenType _ = IDENTIFIER