-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparseRules.py
76 lines (71 loc) · 2.39 KB
/
parseRules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#converts a string into an array of rules.
#example input:
"""\
S -> Expression
Expression -> Expression * Binary
Expression -> Expression + Binary
Expression -> Binary
Binary -> 0
Binary -> 1"""
from primitives import *
def parseRules(ruleText):
ret = []
lines = ruleText.split("\n")
#remove comments and trailing whitespace
lines = [line.partition("#")[0].strip() for line in lines]
#filter out empty lines
lines = [line for line in lines if len(line) > 1]
#expand lines of the form "A -> B | C" into two lines, "A -> B" and "A -> C"
expandedLines = []
for line in lines:
if "|" in line:
LHS, RHSs = line.split("->")
for RHS in RHSs.split("|"):
RHS = RHS.strip()
expandedLines.append(LHS + "->" + RHS)
else:
expandedLines.append(line)
lines = expandedLines
nonTerminalSymbols = set(map(lambda x: x.split("->")[0].strip(), lines))
if "_" not in nonTerminalSymbols:
print "Warning, expected starting Symbol _"
for line in lines:
line = line.split("->")
LHS = NonTerminal(line[0].strip())
RHS = []
for token in line[1].split():
if token == "%SPACE%": token = " "
if token in nonTerminalSymbols:
RHS.append(NonTerminal(token))
else:
#breaking up terminal tokens into individual characters is a double-edged sword.
#it makes tokenizing as simple as iterating a string,
#but it increases the size of the parsing table,
#and potentially increases the likelihood of shift-reduce collisions. (?)
for char in token:
RHS.append(Terminal(char))
ret.append(Rule(LHS, RHS))
return ret
#old version of parseRules. Only allows symbols one character long, and spaces are treated as terminals.
#example input:
"""S->E
E->E*B
E->E+B
E->B
B->0
B->1"""
def _parseRules(ruleText):
ret = []
lines = ruleText.split("\n")
nonTerminalSymbols = map(lambda x: x.split("->")[0], lines)
for line in lines:
line = line.split("->")
LHS = NonTerminal(line[0])
RHS = []
for char in line[1]:
if char in nonTerminalSymbols:
RHS.append(NonTerminal(char))
else:
RHS.append(Terminal(char))
ret.append(Rule(LHS, RHS))
return ret