-
Notifications
You must be signed in to change notification settings - Fork 2
/
grammar.js
234 lines (198 loc) · 4.99 KB
/
grammar.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
function regex(patt) {
return RegExp(patt);
}
// numbers
const SIGN =
choice('-', '+');
const DIGIT =
regex("[0-9]");
const DEC_CHUNK =
regex("[0-9][0-9_]*");
const HEX_CHUNK =
regex("[a-fA-F0-9][a-fA-F0-9_]*");
const RADIX =
choice('2', '3', '4', '5', '6', '7', '8', '9', '10',
'11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
'21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
'31', '32', '33', '34', '35', '36');
const ALPHA_NUM =
regex("[a-zA-Z0-9]");
const RADIX_CHUNK =
regex("[a-zA-Z0-9][a-zA-Z0-9_]*");
// symbols and keywords
const SYM_CHAR_NO_DIGIT_NO_COLON =
regex("[" +
"a-zA-Z" +
"!$%&*+\\-./<=>?@^_" +
"]");
// see is_symbol_char_gen in janet's tools/symcharsgen.c
const SYM_CHAR =
regex("[" +
"0-9:" +
"a-zA-Z" +
"!$%&*+\\-./<=>?@^_" +
"]");
// strings
const STRING_DOUBLE_QUOTE_CONTENT =
repeat(choice(regex("[^" +
"\\\\" +
"\"" +
"]"),
regex("\\\\" +
"(.|\\n)"))); // thanks to tree-sitter-haskell
module.exports = grammar({
name: 'janet_simple',
extras: $ => [
// see is_whitespace in janet's parser.c
regex("\x00|" +
"\x09|\x0a|\x0b|\x0c|\x0d|" +
"\x20"),
$.comment
],
externals: $ => [
$.long_buf_lit,
$.long_str_lit
],
rules: {
// THIS MUST BE FIRST -- even though this doesn't look like it matters
source: $ =>
repeat($._lit),
comment: $ =>
regex("#.*"),
_lit: $ =>
choice($.bool_lit,
$.buf_lit,
$.kwd_lit,
$.long_buf_lit,
$.long_str_lit,
$.nil_lit,
$.num_lit,
$.str_lit,
$.sym_lit,
//
$.par_arr_lit,
$.sqr_arr_lit,
$.struct_lit,
$.tbl_lit,
$.par_tup_lit,
$.sqr_tup_lit,
//
$.qq_lit,
$.quote_lit,
$.short_fn_lit,
$.splice_lit,
$.unquote_lit),
// simplest things
bool_lit: $ =>
// XXX: without the token here, false and true are exposed as
// anonymous nodes it seems...
// yet, the same does not happen for nil...strange
token(choice('false',
'true')),
kwd_lit: $ =>
prec(2,
token(seq(':',
repeat(SYM_CHAR)))),
nil_lit: $ =>
'nil',
num_lit: $ =>
prec(5,
choice($._radix,
$._hex,
$._dec)),
_radix: $ =>
token(seq(optional(SIGN),
RADIX,
'r',
choice(seq(optional("."), RADIX_CHUNK),
seq(RADIX_CHUNK, ".", optional(RADIX_CHUNK))),
optional(seq('&',
optional(SIGN),
repeat1(ALPHA_NUM))))),
_hex: $ =>
token(seq(optional(SIGN),
'0x',
choice(seq(optional("."), HEX_CHUNK),
seq(HEX_CHUNK, ".", optional(HEX_CHUNK))))),
_dec: $ =>
token(seq(optional(SIGN),
choice(seq(optional("."), DEC_CHUNK),
seq(DEC_CHUNK, ".", optional(DEC_CHUNK))),
optional(seq(choice('e', 'E'),
optional(SIGN),
repeat1(DIGIT))))),
str_lit: $ =>
token(seq('"',
STRING_DOUBLE_QUOTE_CONTENT,
'"')),
buf_lit: $ =>
token(seq('@"',
STRING_DOUBLE_QUOTE_CONTENT,
'"')),
sym_lit: $ =>
token(seq(SYM_CHAR_NO_DIGIT_NO_COLON,
repeat(SYM_CHAR))),
// collection-ish things
par_arr_lit: $ =>
seq('@(',
repeat($._lit),
')'),
sqr_arr_lit: $ =>
seq('@[',
repeat($._lit),
']'),
struct_lit: $ =>
seq('{',
repeat($._lit),
'}'),
tbl_lit: $ =>
seq('@{',
repeat($._lit),
'}'),
par_tup_lit: $ =>
seq('(',
repeat($._lit),
')'),
sqr_tup_lit: $ =>
seq('[',
repeat($._lit),
']'),
// macro-related
qq_lit: $ =>
seq('~',
$._lit),
quote_lit: $ =>
seq("'",
$._lit),
// following all work at the repl..
// |(= $ 1)
// |[1 2]
// |@[8 9]
// |@(:fun :time)
// |{:a 1}
// |@{:pose :sit}
// |'(0)
// |~(:x)
// |:kwd
// |a-sym
// |"a-str"
// |@"buffer"
// |``long-string``
// |@``long-buffer``
// |false
// |nil
// |8
// ||8, |||8, etc.
// XXX: |() doesn't work...but don't bother disallowing
short_fn_lit: $ =>
seq('|',
$._lit),
// XXX: ?
splice_lit: $ =>
seq(';',
$._lit),
unquote_lit: $ =>
seq(',',
$._lit),
}
});