-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParser.hs
111 lines (91 loc) · 4 KB
/
Parser.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
{-
-}
{-# LANGUAGE GADTs, KindSignatures #-}
module Parser (parse, lexSymbol) where
import Text.Read (readMaybe)
import Data.Maybe (isJust)
import Data.Char
import Core
data Token :: * where
Bracket :: Direction -> Paren -> Token
Tok :: Symbol -> Token
deriving Show
tokenize :: String -> [String]
tokenize = words -- may want to update
-- returns either the parsed program, or a list of errors
parse :: String -> Either [String] Prgm
parse = accumulateParse . map lexSymbol . tokenize
accumulateParse :: [Either String Token] -> Either [String] Prgm
accumulateParse = clean . acc []
where clean (Left x) = Left x
clean (Right []) = Right [] -- empty program
clean (Right [x]) = Right x
clean _ = Left ["error in clean step."]
acc :: [Paren] -> [Either String Token] -> Either [String] [Prgm]
acc [] [] = Right [[]]
acc bt [] = Left ["reached end with unmatched parens: " ++ show bt]
acc bt (Right (Tok s) : xs) = case acc bt xs of
Right (ys : zss) -> Right ((s:ys) : zss)
errs -> errs
acc bt (Right (Bracket L b) : xs) = case acc (b : bt) xs of
Right (exp : rest : zss) -> Right $ (Quotation exp : rest) : zss
errs -> errs
acc [] (Right (Bracket R b) : xs) = acc [] $ (Left $ "unmatched closing paren: " ++ show b) : xs
acc (p : bt) (Right (Bracket R b) : xs) = if p == b
then
case acc bt xs of
Right ys -> Right ([] : ys)
errs -> errs
else
acc bt $ (Left $ "mismatched parens: expected: " ++ show p ++ " found: " ++ show b) : xs
acc bt (Left err : xs) = Left $ err : foldr filterErr [] xs
where filterErr (Left e) = (e:)
filterErr _ = id
lexSymbol :: String -> Either String Token
lexSymbol str = case readSymbol str of
Just s -> Right s
Nothing -> Left $ "Couldn't read: " ++ str
parseTypes :: [String -> Maybe Symbol]
parseTypes = [ fmap IntLit . readMaybe
, fmap StrLit . readMaybe
, fmap QuotID . readQuotName
] ++ fmap readPrim primatives ++
[ fmap Identifier . readIdentifier
]
where primatives = [ ("+",(:+))
, ("-",(:-))
, ("print", Print)
, ("bind", Bind)
, ("exec", Exec)
]
readPrim :: (String, Primative) -> String -> Maybe Symbol
readPrim (s, p) s1 | s == s1 = Just $ PrimOp p
| otherwise = Nothing
readQuotName :: String -> Maybe Name
readQuotName s = case s of
('\'':nm) -> readIdentifier nm
_ -> Nothing
readIdentifier :: String -> Maybe Name
readIdentifier s = case span isAlphaNum s of -- TODO: fix to allow morpheme decomposition
("", _) -> Nothing
(root, rest) -> fmap (\ms -> Name root ms) $ readMorphemes rest
readMorphemes :: String -> Maybe [Morpheme]
readMorphemes "" = Just []
readMorphemes (c:s)|cat == DashPunctuation || cat `elem` [OtherPunctuation .. OtherSymbol] = fmap ((c:m) :) $ readMorphemes rest
|otherwise = Nothing
where cat = generalCategory c
(m, rest) = span isAlphaNum s
-- add more: maybe infinite possibilities like Lua nested comments
data Paren :: * where
Sq :: Paren -- [ ]
deriving (Show, Eq)
data Direction = L | R deriving Show -- left / right (open / close)
readBracket :: String -> Maybe Token
readBracket "[" = Just $ Bracket L Sq
readBracket "]" = Just $ Bracket R Sq
readBracket _ = Nothing
readSymbol :: String -> Maybe Token
readSymbol str = let parses = map ($ str) parseTypes
in case filter isJust parses of
(Just p : _) -> Just $ Tok p
[] -> readBracket str