forked from isaacg1/pyth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.py
63 lines (59 loc) · 1.62 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def lex(code):
remainder = code
tokens = []
while remainder:
split_point = find_split_point(remainder)
token, remainder = remainder[:split_point], remainder[split_point:]
tokens.append(token)
return tokens
def find_split_point(code):
if len(code) == 1:
return 1
if code[0] == ".":
if code[1] == '"':
return string_split(code[1:]) + 1
if code[1] not in "0123456789":
return 2
if code[0] == '\\':
return 2
if code[0] in ".123456789":
return num_split(code)
if code[0] == '"':
return string_split(code)
if code[0] == '$':
return python_lit_split(code)
return 1
def string_split(code):
assert code[0] == '"'
point = 1
while point < len(code):
if code[point] == '\\':
if len(code) == point + 1:
point += 1
break
elif code[point+1] in ('"', '\\'):
point += 2
continue
if code[point] == '"':
point += 1
break
else:
point += 1
return point
def num_split(code):
point = 0
seen_a_dot = False
while point < len(code) \
and code[point] in ".0123456789" \
and (not (seen_a_dot and code[point] == '.')):
seen_a_dot = seen_a_dot or code[point] == '.'
point += 1
if point < len(code) and code[point-1] == '.':
point -= 1
return point
def python_lit_split(code):
assert code[0] == '$'
if '$' not in code[1:]:
return len(code)
else:
return code[1:].index('$') + 2