Skip to content

Commit

Permalink
add nfa
Browse files Browse the repository at this point in the history
  • Loading branch information
shellfly committed Feb 7, 2020
1 parent 1be234e commit 751b0aa
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 2 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ Try to keep the interface and variable name consistent with the original book wh
* [TrieST](algs4/trie_st.py)
* [TST](algs4/tst.py)
* [KMP](algs4/kmp.py)

* [NFA](algs4/nfa.py)

## License

This code is released under MIT.
Expand Down
2 changes: 1 addition & 1 deletion algs4/bag.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self):
self.n = 0

def __str__(self):
return " ".join(i for i in self)
return " ".join(str(i) for i in self)

def __iter__(self):
return LinkIterator(self.first)
Expand Down
88 changes: 88 additions & 0 deletions algs4/nfa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
* Execution: python nfa.py regexp text
*
* % python nfa.py "(A*B|AC)D" AAAABD
* true
*
* % python nfa.py "(A*B|AC)D" AAAAC
* false
*
* % python nfa.py "(a|(bc)*d)*" abcbcd
* true
*
* % python nfa.py "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd
* true
*
* Remarks
* -----------
* The following features are not supported:
* - The + operator
* - Multiway or
* - Metacharacters in the text
* - Character classes.
*
"""

from algs4.bag import Bag
from algs4.digraph import Digraph
from algs4.directed_dfs import DirectedDFS
from algs4.stack import Stack


class NFA:
def __init__(self, regexp):
ops = Stack()
M = len(regexp)
G = Digraph(M+1)
for i in range(M):
lp = i
if regexp[i] == "(" or regexp[i] == "|":
ops.push(i)
elif regexp[i] == ")":
op = ops.pop()
if regexp[op] == "|":
lp = ops.pop()
G.add_edge(lp, op+1)
G.add_edge(op, i)
else:
lp = op
if i < M-1 and regexp[i+1] == "*":
G.add_edge(lp, i+1)
G.add_edge(i+1, lp)
if regexp[i] in ("(", "*", ")"):
G.add_edge(i, i+1)
self.M = M
self.G = G
self.re = regexp

def recognizes(self, txt):
pc = Bag()
dfs = DirectedDFS(self.G, [0])
for v in range(self.G.V):
if dfs.marked(v):
pc.add(v)
for i in range(len(txt)):
match = Bag()
for v in pc:
if v < self.M:
if self.re[v] == txt[i] or self.re[v] == ".":
match.add(v+1)
pc = Bag()
dfs = DirectedDFS(self.G, match)
for v in range(self.G.V):
if dfs.marked(v):
pc.add(v)
for v in pc:
if v == self.M:
return True
return False

def char_at(self, s, d):
return ord(s[d])


if __name__ == "__main__":
import sys
pattern, txt = sys.argv[1], sys.argv[2]
nfa = NFA(pattern)
print(nfa.recognizes(txt))

0 comments on commit 751b0aa

Please sign in to comment.