@@ -5,6 +5,8 @@ module AST.Parse
5
5
) where
6
6
7
7
import Control.Monad.Combinators.Expr qualified as Combinators
8
+ import Data.ByteString (ByteString )
9
+ import Data.ByteString qualified as ByteString
8
10
import Data.Char qualified as Char
9
11
import Data.Function ((&) )
10
12
import Data.List (foldl' )
@@ -14,43 +16,64 @@ import Data.Set qualified as Set
14
16
import Data.Text (Text )
15
17
import Data.Text qualified as Text
16
18
import Data.Void (Void )
19
+ import Data.Word (Word8 )
17
20
import Text.Megaparsec (Parsec , (<?>) )
18
21
import Text.Megaparsec qualified as Parsec
19
- import Text.Megaparsec.Char qualified as ParsecChar
20
- import Text.Megaparsec.Char .Lexer qualified as Lexer
22
+ import Text.Megaparsec.Byte qualified as Parsec.Byte
23
+ import Text.Megaparsec.Byte .Lexer qualified as Lexer
21
24
import Text.Megaparsec.Error qualified as ParsecError
22
25
23
26
import AST.Syntax (Name , Expr )
24
27
import AST.Syntax qualified as Syntax
25
28
26
29
--------------------------------------------------------------------------------
27
30
28
- type Parser a = Parsec Void Text a
31
+ type Parser a = Parsec Void ByteString a
29
32
30
33
newtype Error = Error Text
31
34
32
35
--------------------------------------------------------------------------------
33
36
37
+ isUpper :: Word8 -> Bool
38
+ isUpper c = 65 <= c && c <= 90
39
+
40
+ isLower :: Word8 -> Bool
41
+ isLower c = 97 <= c && c <= 122
42
+
43
+ isAlpha :: Word8 -> Bool
44
+ isAlpha c = isUpper c || isLower c
45
+
46
+ isSpace :: Word8 -> Bool
47
+ isSpace c = c == 9 || c == 10 || c == 13 || c == 32
48
+
49
+ isNumber :: Word8 -> Bool
50
+ isNumber c = 48 <= c && c <= 57
51
+
52
+ underscore :: Word8
53
+ underscore = 95
54
+
55
+ --------------------------------------------------------------------------------
56
+
34
57
space :: Parser ()
35
58
space =
36
59
Lexer. space
37
- ParsecChar . space1
60
+ Parsec.Byte . space1
38
61
(Lexer. skipLineComment " //" )
39
62
(Lexer. skipBlockComment " /*" " */" )
40
63
41
64
lexeme :: Parser a -> Parser a
42
65
lexeme =
43
66
Lexer. lexeme space
44
67
45
- symbol :: Text -> Parser ()
68
+ symbol :: ByteString -> Parser ()
46
69
symbol s =
47
70
() <$ Lexer. symbol space s
48
71
49
72
float :: Parser Double
50
73
float =
51
74
lexeme Lexer. float
52
75
53
- keywords :: Set Text
76
+ keywords :: Set ByteString
54
77
keywords = Set. fromList
55
78
[ " if"
56
79
, " then"
@@ -59,26 +82,26 @@ keywords = Set.fromList
59
82
, " extern"
60
83
]
61
84
62
- keyword :: Text -> Parser ()
85
+ keyword :: ByteString -> Parser ()
63
86
keyword str =
64
87
lexeme $
65
- () <$ ParsecChar . string str
66
- <* Parsec. notFollowedBy ParsecChar . alphaNumChar
88
+ () <$ Parsec.Byte . string str
89
+ <* Parsec. notFollowedBy Parsec.Byte . alphaNumChar
67
90
68
- identifierOrKeyword :: Parser Text
91
+ identifierOrKeyword :: Parser ByteString
69
92
identifierOrKeyword =
70
93
let
71
94
alphaChar =
72
95
Parsec. satisfy
73
- (\ c -> ( Char. isAlpha c || c == ' _ ' ) && Char. isAscii c )
96
+ (\ c -> isAlpha c || c == underscore )
74
97
<?> " alphabet"
75
98
76
99
alphaNumChars =
77
100
Parsec. takeWhileP
78
101
(Just " alphabets or numbers" )
79
- (\ c -> ( Char. isAlpha c || Char. isNumber c || c == ' _ ' ) && Char. isAscii c )
102
+ (\ c -> isAlpha c || isNumber c || c == underscore )
80
103
in
81
- lexeme (Text . cons <$> alphaChar <*> alphaNumChars)
104
+ lexeme (ByteString . cons <$> alphaChar <*> alphaNumChars)
82
105
83
106
identifier :: Parser Name
84
107
identifier =
@@ -87,7 +110,7 @@ identifier =
87
110
word <- identifierOrKeyword
88
111
if Set. member word keywords then
89
112
let
90
- actual = ParsecError. Tokens (NonEmpty. fromList (Text . unpack word))
113
+ actual = ParsecError. Tokens (NonEmpty. fromList (ByteString . unpack word))
91
114
expected = ParsecError. Label (NonEmpty. fromList " identifier" )
92
115
err = ParsecError. TrivialError offset (Just actual) (Set. singleton expected)
93
116
in
@@ -172,7 +195,7 @@ toplevel =
172
195
*> Parsec. many (defn <* symbol " ;" )
173
196
<* Parsec. eof
174
197
175
- parse :: Text -> Either Error [Expr ]
198
+ parse :: ByteString -> Either Error [Expr ]
176
199
parse source =
177
200
case Parsec. parse toplevel " <stdin>" source of
178
201
Left errors ->
0 commit comments