module Lexer ( TokenType(..) , Token(..) , lexL ) where import Data.Char (isAlphaNum) data TokenType = Lambda | Dot | Identifier | OpenParen | CloseParen | EOF deriving (Show, Eq) data Token = Token { tokType :: TokenType , tokLine :: Int , tokCol :: Int , tokLexeme :: String } deriving (Show) data LexState = LexState { src :: String , col :: Int , line :: Int } type LexResult = Either String (Maybe Token, LexState) iic :: Char -> Bool iic c = isAlphaNum c || c == '_' isIgnoreChar :: Char -> Bool isIgnoreChar c = c `elem` "() \t" peek :: LexState -> Either String Char peek st = case drop (col st) (src st) of [] -> Left "Out of characters" (c:_) -> Right c mkToken :: LexState -> TokenType -> Int -> Either String Token mkToken st ttype start | ttype == EOF = Right $ Token ttype (line st) start (drop start (take (col st) (src st))) | start < length (src st) && col st > 0 && col st - 1 < length (src st) = Right $ Token ttype (line st) start (drop start (take (col st) (src st))) | otherwise = Left $ "Failed to resolve " ++ show ttype ++ ". Line " ++ show (line st) ++ ", " ++ show start ++ ":" ++ show (col st) advance :: LexState -> LexState advance st = st { col = col st + 1 } isEol :: LexState -> Bool isEol st = col st >= length (src st) lexC :: LexState -> LexResult lexC st | isEol st = Right (Nothing, st) | otherwise = lexC (advance st) lexIdent :: LexState -> LexResult lexIdent st = let start = col st st' = scanIdent st in case mkToken st' Identifier start of Left e -> Left e Right t -> Right (Just t, st') where scanIdent s | isEol s = s | otherwise = case peek s of Right c | iic c -> scanIdent (advance s) _ -> s lexS :: LexState -> TokenType -> LexResult lexS st ttype = let st' = advance st in case mkToken st' ttype (col st) of Left e -> Left e Right t -> Right (Just t, st') lexToken :: LexState -> LexResult lexToken st = case peek st of Left e -> Left e Right c -> case c of 'λ' -> lexS st Lambda '\\' -> lexS st Lambda '.' -> lexS st Dot '(' -> lexS st OpenParen ')' -> lexS st CloseParen '#' -> lexC st _ | iic c -> lexIdent st _ | isIgnoreChar c -> Right (Nothing, advance st) _ -> Left $ "Bad character. Line " ++ show (line st) ++ ", column " ++ show (col st) lexAll :: LexState -> Either String ([Token], LexState) lexAll st | isEol st = Right ([], st) | otherwise = case lexToken st of Left e -> Left e Right (mt, st') -> case lexAll st' of Left e -> Left e Right (ts, st'') -> Right (maybe ts (:ts) mt, st'') lexL :: Int -> String -> Either String [Token] lexL lineNum input = do let st = LexState { src = input, col = 0, line = lineNum } (tokens, st') <- lexAll st let eofTok = Token EOF (line st') (col st') "" return (tokens ++ [eofTok])