lambda-calc/Lexer.hs
2026-04-10 00:18:10 +02:00

115 lines
3.1 KiB
Haskell

module Lexer
( TokenType(..)
, Token(..)
, lexL
) where
import Data.Char (isAlphaNum)
data TokenType
= Lambda
| Dot
| Identifier
| OpenParen
| CloseParen
| EOF
deriving (Show, Eq)
data Token = Token
{ tokType :: TokenType
, tokLine :: Int
, tokCol :: Int
, tokLexeme :: String
} deriving (Show)
data LexState = LexState
{ src :: String
, col :: Int
, line :: Int
}
type LexResult = Either String (Maybe Token, LexState)
iic :: Char -> Bool
iic c = isAlphaNum c || c == '_'
isIgnoreChar :: Char -> Bool
isIgnoreChar c = c `elem` "() \t"
peek :: LexState -> Either String Char
peek st = case drop (col st) (src st) of
[] -> Left "Out of characters"
(c:_) -> Right c
mkToken :: LexState -> TokenType -> Int -> Either String Token
mkToken st ttype start
| ttype == EOF =
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
| start < length (src st) && col st > 0 && col st - 1 < length (src st) =
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
| otherwise =
Left $ "Failed to resolve " ++ show ttype ++
". Line " ++ show (line st) ++ ", " ++ show start ++ ":" ++ show (col st)
advance :: LexState -> LexState
advance st = st { col = col st + 1 }
isEol :: LexState -> Bool
isEol st = col st >= length (src st)
lexC :: LexState -> LexResult
lexC st
| isEol st = Right (Nothing, st)
| otherwise = lexC (advance st)
lexIdent :: LexState -> LexResult
lexIdent st =
let start = col st
st' = scanIdent st
in case mkToken st' Identifier start of
Left e -> Left e
Right t -> Right (Just t, st')
where
scanIdent s
| isEol s = s
| otherwise = case peek s of
Right c | iic c -> scanIdent (advance s)
_ -> s
lexS :: LexState -> TokenType -> LexResult
lexS st ttype =
let st' = advance st
in case mkToken st' ttype (col st) of
Left e -> Left e
Right t -> Right (Just t, st')
lexToken :: LexState -> LexResult
lexToken st = case peek st of
Left e -> Left e
Right c -> case c of
'λ' -> lexS st Lambda
'\\' -> lexS st Lambda
'.' -> lexS st Dot
'(' -> lexS st OpenParen
')' -> lexS st CloseParen
'#' -> lexC st
_ | iic c -> lexIdent st
_ | isIgnoreChar c -> Right (Nothing, advance st)
_ -> Left $ "Bad character. Line " ++ show (line st) ++
", column " ++ show (col st)
lexAll :: LexState -> Either String ([Token], LexState)
lexAll st
| isEol st = Right ([], st)
| otherwise = case lexToken st of
Left e -> Left e
Right (mt, st') -> case lexAll st' of
Left e -> Left e
Right (ts, st'') -> Right (maybe ts (:ts) mt, st'')
lexL :: Int -> String -> Either String [Token]
lexL lineNum input = do
let st = LexState { src = input, col = 0, line = lineNum }
(tokens, st') <- lexAll st
let eofTok = Token EOF (line st') (col st') ""
return (tokens ++ [eofTok])