115 lines
3.1 KiB
Haskell
115 lines
3.1 KiB
Haskell
module Lexer
|
|
( TokenType(..)
|
|
, Token(..)
|
|
, lexL
|
|
) where
|
|
|
|
import Data.Char (isAlphaNum)
|
|
|
|
data TokenType
|
|
= Lambda
|
|
| Dot
|
|
| Identifier
|
|
| OpenParen
|
|
| CloseParen
|
|
| EOF
|
|
deriving (Show, Eq)
|
|
|
|
data Token = Token
|
|
{ tokType :: TokenType
|
|
, tokLine :: Int
|
|
, tokCol :: Int
|
|
, tokLexeme :: String
|
|
} deriving (Show)
|
|
|
|
data LexState = LexState
|
|
{ src :: String
|
|
, col :: Int
|
|
, line :: Int
|
|
}
|
|
|
|
type LexResult = Either String (Maybe Token, LexState)
|
|
|
|
iic :: Char -> Bool
|
|
iic c = isAlphaNum c || c == '_'
|
|
|
|
isIgnoreChar :: Char -> Bool
|
|
isIgnoreChar c = c `elem` "() \t"
|
|
|
|
peek :: LexState -> Either String Char
|
|
peek st = case drop (col st) (src st) of
|
|
[] -> Left "Out of characters"
|
|
(c:_) -> Right c
|
|
|
|
mkToken :: LexState -> TokenType -> Int -> Either String Token
|
|
mkToken st ttype start
|
|
| ttype == EOF =
|
|
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
|
|
| start < length (src st) && col st > 0 && col st - 1 < length (src st) =
|
|
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
|
|
| otherwise =
|
|
Left $ "Failed to resolve " ++ show ttype ++
|
|
". Line " ++ show (line st) ++ ", " ++ show start ++ ":" ++ show (col st)
|
|
|
|
advance :: LexState -> LexState
|
|
advance st = st { col = col st + 1 }
|
|
|
|
isEol :: LexState -> Bool
|
|
isEol st = col st >= length (src st)
|
|
|
|
lexC :: LexState -> LexResult
|
|
lexC st
|
|
| isEol st = Right (Nothing, st)
|
|
| otherwise = lexC (advance st)
|
|
|
|
lexIdent :: LexState -> LexResult
|
|
lexIdent st =
|
|
let start = col st
|
|
st' = scanIdent st
|
|
in case mkToken st' Identifier start of
|
|
Left e -> Left e
|
|
Right t -> Right (Just t, st')
|
|
where
|
|
scanIdent s
|
|
| isEol s = s
|
|
| otherwise = case peek s of
|
|
Right c | iic c -> scanIdent (advance s)
|
|
_ -> s
|
|
|
|
lexS :: LexState -> TokenType -> LexResult
|
|
lexS st ttype =
|
|
let st' = advance st
|
|
in case mkToken st' ttype (col st) of
|
|
Left e -> Left e
|
|
Right t -> Right (Just t, st')
|
|
|
|
lexToken :: LexState -> LexResult
|
|
lexToken st = case peek st of
|
|
Left e -> Left e
|
|
Right c -> case c of
|
|
'λ' -> lexS st Lambda
|
|
'\\' -> lexS st Lambda
|
|
'.' -> lexS st Dot
|
|
'(' -> lexS st OpenParen
|
|
')' -> lexS st CloseParen
|
|
'#' -> lexC st
|
|
_ | iic c -> lexIdent st
|
|
_ | isIgnoreChar c -> Right (Nothing, advance st)
|
|
_ -> Left $ "Bad character. Line " ++ show (line st) ++
|
|
", column " ++ show (col st)
|
|
|
|
lexAll :: LexState -> Either String ([Token], LexState)
|
|
lexAll st
|
|
| isEol st = Right ([], st)
|
|
| otherwise = case lexToken st of
|
|
Left e -> Left e
|
|
Right (mt, st') -> case lexAll st' of
|
|
Left e -> Left e
|
|
Right (ts, st'') -> Right (maybe ts (:ts) mt, st'')
|
|
|
|
lexL :: Int -> String -> Either String [Token]
|
|
lexL lineNum input = do
|
|
let st = LexState { src = input, col = 0, line = lineNum }
|
|
(tokens, st') <- lexAll st
|
|
let eofTok = Token EOF (line st') (col st') ""
|
|
return (tokens ++ [eofTok])
|