From 751b0aa9b3407092b01723c4971abb3784002ac1 Mon Sep 17 00:00:00 2001 From: shellfly Date: Fri, 7 Feb 2020 20:32:09 +0800 Subject: [PATCH] add nfa --- README.md | 3 +- algs4/bag.py | 2 +- algs4/nfa.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 algs4/nfa.py diff --git a/README.md b/README.md index a5c319e..f8f4615 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,8 @@ Try to keep the interface and variable name consistent with the original book wh * [TrieST](algs4/trie_st.py) * [TST](algs4/tst.py) * [KMP](algs4/kmp.py) - + * [NFA](algs4/nfa.py) + ## License This code is released under MIT. diff --git a/algs4/bag.py b/algs4/bag.py index b0d1076..7745eaf 100644 --- a/algs4/bag.py +++ b/algs4/bag.py @@ -19,7 +19,7 @@ def __init__(self): self.n = 0 def __str__(self): - return " ".join(i for i in self) + return " ".join(str(i) for i in self) def __iter__(self): return LinkIterator(self.first) diff --git a/algs4/nfa.py b/algs4/nfa.py new file mode 100644 index 0000000..5f1ab79 --- /dev/null +++ b/algs4/nfa.py @@ -0,0 +1,88 @@ +""" + * Execution: python nfa.py regexp text + * + * % python nfa.py "(A*B|AC)D" AAAABD + * true + * + * % python nfa.py "(A*B|AC)D" AAAAC + * false + * + * % python nfa.py "(a|(bc)*d)*" abcbcd + * true + * + * % python nfa.py "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd + * true + * + * Remarks + * ----------- + * The following features are not supported: + * - The + operator + * - Multiway or + * - Metacharacters in the text + * - Character classes. + * +""" + +from algs4.bag import Bag +from algs4.digraph import Digraph +from algs4.directed_dfs import DirectedDFS +from algs4.stack import Stack + + +class NFA: + def __init__(self, regexp): + ops = Stack() + M = len(regexp) + G = Digraph(M+1) + for i in range(M): + lp = i + if regexp[i] == "(" or regexp[i] == "|": + ops.push(i) + elif regexp[i] == ")": + op = ops.pop() + if regexp[op] == "|": + lp = ops.pop() + G.add_edge(lp, op+1) + G.add_edge(op, i) + else: + lp = op + if i < M-1 and regexp[i+1] == "*": + G.add_edge(lp, i+1) + G.add_edge(i+1, lp) + if regexp[i] in ("(", "*", ")"): + G.add_edge(i, i+1) + self.M = M + self.G = G + self.re = regexp + + def recognizes(self, txt): + pc = Bag() + dfs = DirectedDFS(self.G, [0]) + for v in range(self.G.V): + if dfs.marked(v): + pc.add(v) + for i in range(len(txt)): + match = Bag() + for v in pc: + if v < self.M: + if self.re[v] == txt[i] or self.re[v] == ".": + match.add(v+1) + pc = Bag() + dfs = DirectedDFS(self.G, match) + for v in range(self.G.V): + if dfs.marked(v): + pc.add(v) + for v in pc: + if v == self.M: + return True + return False + + def char_at(self, s, d): + return ord(s[d]) + + +if __name__ == "__main__": + import sys + pattern, txt = sys.argv[1], sys.argv[2] + nfa = NFA(pattern) + print(nfa.recognizes(txt))