-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathMain.hs
295 lines (263 loc) · 9.98 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ExtendedDefaultRules #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-missing-signatures -fno-warn-type-defaults #-}
-- | Take in Haskell code and output a vector of source spans and
-- their associated node type and case.
module Main (main) where
import Control.Applicative
import Data.Data
import Data.List
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Descriptive
import Descriptive.Options
import Language.Haskell.Exts
import System.Environment
-- | A generic Dynamic-like constructor -- but more convenient to
-- write and pattern match on.
data D = forall a. Data a => D a
-- | A parser. Presently there is only 'parseTopLevel', but in the
-- past, and in the future, there will be the facility to parse
-- specific parse of declarations, rather than re-parsing the whole
-- declaration which can be both slow and brittle.
type Parser = ParseMode -> String -> ParseResult D
-- | The 'empty' method isn't (shouldn't be) used, so this isn't a
-- real Alternative instance (perhaps a Semigroup might do?). But it's
-- handy.
instance Alternative ParseResult where
empty = ParseFailed undefined undefined
ParseFailed{} <|> x = x
x <|> _ = x
--- | Main entry point.
main :: IO ()
main =
do code <- getContents
args <- getArgs
case consume options (map T.pack args) of
Succeeded (action,typ,exts) ->
outputWith action typ exts code
_ ->
error (T.unpack (textDescription (describe options [])))
-- | Action to perform.
data Action = Parse | Check
-- | Thing to parse.
data ParseType = Decl | Stmt
-- | Command line options.
options :: Monad m => Consumer [Text] (Option ()) m (Action,ParseType,[Extension])
options = (,,) <$> action <*> typ <*> exts
where action =
constant "parse" "Parse and spit out spans" Parse <|>
constant "check" "Just check the syntax" Check
typ =
constant "decl" "Parse a declaration" Decl <|>
constant "stmt" "Parse a statement" Stmt
exts =
fmap getExtensions
(many (prefix "X" "Language extension"))
-- | Output some result with the given action (check/parse/etc.),
-- parsing the given type of AST node. In the past, the type was any
-- kind of AST node. Today, it's just a "decl" which is covered by
-- 'parseTopLevel'.
outputWith :: Action -> ParseType -> [Extension] -> String -> IO ()
outputWith action typ exts code =
case typ of
Decl -> output action parseTopLevel exts code
Stmt -> output action parseSomeStmt exts code
-- | Output AST info for the given Haskell code.
output :: Action -> Parser -> [Extension] -> String -> IO ()
output action parser exts code =
case parser mode code of
ParseFailed _ e -> error e
ParseOk (D ast) ->
case action of
Check -> return ()
Parse ->
putStrLn ("[" ++
concat (genHSE mode ast) ++
"]")
where mode = parseMode {extensions = exts}
-- | An umbrella parser to parse:
--
-- * A declaration.
--
-- * An import line (not normally counted as a declaration).
--
-- * A module header (not normally counted either).
--
-- * A module pragma (normally part of the module header).
--
parseTopLevel :: ParseMode -> String -> ParseResult D
parseTopLevel mode code =
((D . fix) <$> parseDeclWithMode mode code) <|>
(D <$> parseImport mode code) <|>
((D . fix) <$> parseModuleWithMode mode code) <|>
(D <$> parseModulePragma mode code)
-- | Parse a do-notation statement.
parseSomeStmt :: ParseMode -> String -> ParseResult D
parseSomeStmt mode code =
((D . fix) <$> parseStmtWithMode mode code) <|>
((D . fix) <$> parseExpWithMode mode code) <|>
(D <$> parseImport mode code)
-- | Apply fixities after parsing.
fix ast = fromMaybe ast (applyFixities baseFixities ast)
-- | Parse mode, includes all extensions, doesn't assume any fixities.
parseMode :: ParseMode
parseMode =
defaultParseMode {extensions = defaultExtensions
,fixities = Nothing}
-- | Generate a list of spans from the HSE AST.
genHSE :: Data a => ParseMode -> a -> [String]
genHSE mode x =
case gmapQ D x of
zs@(D y:ys) ->
case cast y of
Just s ->
spanHSE (show (show (typeOf x)))
(showConstr (toConstr x))
(srcInfoSpan s) :
concatMap (\(i,D d) -> pre x i ++ genHSE mode d)
(zip [0..] ys) ++
post mode x
_ ->
concatMap (\(D d) -> genHSE mode d) zs
_ -> []
-- | Pre-children tweaks for a given parent at index i.
--
pre :: (Typeable a) => a -> Integer -> [String]
pre x i =
case cast x of
-- <foo { <foo = 1> }> becomes <foo <{ <foo = 1> }>>
Just (RecUpdate SrcSpanInfo{srcInfoPoints=(start:_),srcInfoSpan=end} _ _)
| i == 1 ->
[spanHSE (show "RecUpdates")
"RecUpdates"
(SrcSpan (srcSpanFilename start)
(srcSpanStartLine start)
(srcSpanStartColumn start)
(srcSpanEndLine end)
(srcSpanEndColumn end))]
_ -> case cast x :: Maybe (Deriving SrcSpanInfo) of
-- <deriving (X,Y,Z)> becomes <deriving (<X,Y,Z>)
#if MIN_VERSION_haskell_src_exts(1,20,0)
Just (Deriving _ _ ds@(_:_)) ->
#else
Just (Deriving _ ds@(_:_)) ->
#endif
[spanHSE (show "InstHeads")
"InstHeads"
(SrcSpan (srcSpanFilename start)
(srcSpanStartLine start)
(srcSpanStartColumn start)
(srcSpanEndLine end)
(srcSpanEndColumn end))
|Just (IRule _ _ _ (IHCon (SrcSpanInfo start _) _)) <- [listToMaybe ds]
,Just (IRule _ _ _ (IHCon (SrcSpanInfo end _) _)) <- [listToMaybe (reverse ds)]]
_ -> []
-- | Post-node tweaks for a parent, e.g. adding more children.
post :: (Typeable a) => ParseMode -> a -> [String]
post mode x =
case cast x of
Just (QuasiQuote (base :: SrcSpanInfo) qname content) ->
case parseExpWithMode mode content of
ParseOk ex -> genHSE mode (fmap (redelta qname base) ex)
ParseFailed _ e -> error e
_ -> []
-- | Apply a delta to the positions in the given span from the base.
redelta :: String -> SrcSpanInfo -> SrcSpanInfo -> SrcSpanInfo
redelta qname base (SrcSpanInfo (SrcSpan fp sl sc el ec) pts) =
SrcSpanInfo
(if sl == 1
then SrcSpan fp
(sl + lineOffset)
(sc + columnOffset)
(el + lineOffset)
(if el == sl
then ec + columnOffset
else ec)
else SrcSpan fp
(sl + lineOffset)
sc
(el + lineOffset)
ec)
pts
where lineOffset = sl' - 1
columnOffset =
sc' - 1 +
length ("[" :: String) +
length qname +
length ("|" :: String)
(SrcSpanInfo (SrcSpan _ sl' sc' _ _) _) = base
-- | Generate a span from a HSE SrcSpan.
spanHSE :: String -> String -> SrcSpan -> String
spanHSE typ cons SrcSpan{..} = "[" ++ spanContent ++ "]"
where unqualify = dropUntilLast '.'
spanContent =
unwords [unqualify typ
,cons
,show srcSpanStartLine
,show srcSpanStartColumn
,show srcSpanEndLine
,show srcSpanEndColumn]
------------------------------------------------------------------------------
-- General Utility
-- | Like 'dropWhile', but repeats until the last match.
dropUntilLast :: Char -> String -> String
dropUntilLast ch = go []
where
go _ (c:cs) | c == ch = go [] cs
go acc (c:cs) = go (c:acc) cs
go acc [] = reverse acc
--------------------------------------------------------------------------------
-- Parsers that HSE hackage doesn't have
parseImport :: ParseMode -> String -> ParseResult (ImportDecl SrcSpanInfo)
parseImport mode code =
case parseModuleWithMode mode code of
ParseOk (Module _ _ _ [i] _) -> return i
ParseOk _ -> ParseFailed noLoc "parseImport"
ParseFailed x y -> ParseFailed x y
parseModulePragma :: ParseMode -> String -> ParseResult (ModulePragma SrcSpanInfo)
parseModulePragma mode code =
case parseModuleWithMode mode (code ++ "\nmodule X where") of
ParseOk (Module _ _ [p] _ _) -> return p
ParseOk _ -> ParseFailed noLoc "parseModulePragma"
ParseFailed x y -> ParseFailed x y
--------------------------------------------------------------------------------
-- Extensions stuff stolen from hlint
-- | Consume an extensions list from arguments.
getExtensions :: [Text] -> [Extension]
getExtensions = foldl f defaultExtensions . map T.unpack
where f _ "Haskell98" = []
f a ('N':'o':x)
| Just x' <- readExtension x =
delete x' a
f a x
| Just x' <- readExtension x =
x' :
delete x' a
f _ x = error $ "Unknown extension: " ++ x
-- | Parse an extension.
readExtension :: String -> Maybe Extension
readExtension x =
case classifyExtension x of
UnknownExtension _ -> Nothing
x' -> Just x'
-- | Default extensions.
defaultExtensions :: [Extension]
defaultExtensions =
[e | e@EnableExtension{} <- knownExtensions] \\ baddies
where baddies = map EnableExtension badExtensions
-- | Extensions which steal too much syntax.
badExtensions :: [KnownExtension]
badExtensions =
[Arrows -- steals proc
,TransformListComp -- steals the group keyword
,XmlSyntax, RegularPatterns -- steals a-b
,UnboxedTuples -- breaks (#) lens operator
-- ,QuasiQuotes -- breaks [x| ...], making whitespace free list comps break
]