push
This commit is contained in:
commit
ebaed617a9
4 changed files with 329 additions and 0 deletions
115
Lexer.hs
Normal file
115
Lexer.hs
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
module Lexer
|
||||
( TokenType(..)
|
||||
, Token(..)
|
||||
, lexL
|
||||
) where
|
||||
|
||||
import Data.Char (isAlphaNum)
|
||||
|
||||
data TokenType
|
||||
= Lambda
|
||||
| Dot
|
||||
| Identifier
|
||||
| OpenParen
|
||||
| CloseParen
|
||||
| EOF
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Token = Token
|
||||
{ tokType :: TokenType
|
||||
, tokLine :: Int
|
||||
, tokCol :: Int
|
||||
, tokLexeme :: String
|
||||
} deriving (Show)
|
||||
|
||||
data LexState = LexState
|
||||
{ src :: String
|
||||
, col :: Int
|
||||
, line :: Int
|
||||
}
|
||||
|
||||
type LexResult = Either String (Maybe Token, LexState)
|
||||
|
||||
iic :: Char -> Bool
|
||||
iic c = isAlphaNum c || c == '_'
|
||||
|
||||
isIgnoreChar :: Char -> Bool
|
||||
isIgnoreChar c = c `elem` "() \t"
|
||||
|
||||
peek :: LexState -> Either String Char
|
||||
peek st = case drop (col st) (src st) of
|
||||
[] -> Left "Out of characters"
|
||||
(c:_) -> Right c
|
||||
|
||||
mkToken :: LexState -> TokenType -> Int -> Either String Token
|
||||
mkToken st ttype start
|
||||
| ttype == EOF =
|
||||
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
|
||||
| start < length (src st) && col st > 0 && col st - 1 < length (src st) =
|
||||
Right $ Token ttype (line st) start (drop start (take (col st) (src st)))
|
||||
| otherwise =
|
||||
Left $ "Failed to resolve " ++ show ttype ++
|
||||
". Line " ++ show (line st) ++ ", " ++ show start ++ ":" ++ show (col st)
|
||||
|
||||
advance :: LexState -> LexState
|
||||
advance st = st { col = col st + 1 }
|
||||
|
||||
isEol :: LexState -> Bool
|
||||
isEol st = col st >= length (src st)
|
||||
|
||||
lexC :: LexState -> LexResult
|
||||
lexC st
|
||||
| isEol st = Right (Nothing, st)
|
||||
| otherwise = lexC (advance st)
|
||||
|
||||
lexIdent :: LexState -> LexResult
|
||||
lexIdent st =
|
||||
let start = col st
|
||||
st' = scanIdent st
|
||||
in case mkToken st' Identifier start of
|
||||
Left e -> Left e
|
||||
Right t -> Right (Just t, st')
|
||||
where
|
||||
scanIdent s
|
||||
| isEol s = s
|
||||
| otherwise = case peek s of
|
||||
Right c | iic c -> scanIdent (advance s)
|
||||
_ -> s
|
||||
|
||||
lexS :: LexState -> TokenType -> LexResult
|
||||
lexS st ttype =
|
||||
let st' = advance st
|
||||
in case mkToken st' ttype (col st) of
|
||||
Left e -> Left e
|
||||
Right t -> Right (Just t, st')
|
||||
|
||||
lexToken :: LexState -> LexResult
|
||||
lexToken st = case peek st of
|
||||
Left e -> Left e
|
||||
Right c -> case c of
|
||||
'λ' -> lexS st Lambda
|
||||
'\\' -> lexS st Lambda
|
||||
'.' -> lexS st Dot
|
||||
'(' -> lexS st OpenParen
|
||||
')' -> lexS st CloseParen
|
||||
'#' -> lexC st
|
||||
_ | iic c -> lexIdent st
|
||||
_ | isIgnoreChar c -> Right (Nothing, advance st)
|
||||
_ -> Left $ "Bad character. Line " ++ show (line st) ++
|
||||
", column " ++ show (col st)
|
||||
|
||||
lexAll :: LexState -> Either String ([Token], LexState)
|
||||
lexAll st
|
||||
| isEol st = Right ([], st)
|
||||
| otherwise = case lexToken st of
|
||||
Left e -> Left e
|
||||
Right (mt, st') -> case lexAll st' of
|
||||
Left e -> Left e
|
||||
Right (ts, st'') -> Right (maybe ts (:ts) mt, st'')
|
||||
|
||||
lexL :: Int -> String -> Either String [Token]
|
||||
lexL lineNum input = do
|
||||
let st = LexState { src = input, col = 0, line = lineNum }
|
||||
(tokens, st') <- lexAll st
|
||||
let eofTok = Token EOF (line st') (col st') ""
|
||||
return (tokens ++ [eofTok])
|
||||
Loading…
Add table
Add a link
Reference in a new issue