Skip to content

Commit

Permalink
add kmp
Browse files Browse the repository at this point in the history
  • Loading branch information
shellfly committed Feb 7, 2020
1 parent bdb3dc5 commit 1be234e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ Try to keep the interface and variable name consistent with the original book wh
* [Quick3string](algs4/quick3_string.py)
* [TrieST](algs4/trie_st.py)
* [TST](algs4/tst.py)
* [KMP](algs4/kmp.py)

## License

This code is released under MIT.
Expand Down
71 changes: 71 additions & 0 deletions algs4/kmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
* Execution: python kmp.py pattern text
*
* Reads in two strings, the pattern and the input text, and
* searches for the pattern in the input text using the
* KMP algorithm.
*
* % python kmp.py abracadabra abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: abracadabra
*
* % python kmp.py rab abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: rab
*
* % python kmp.py bcara abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: bcara
*
* % python kmp.py rabrabracad abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: rabrabracad
*
* % python kmp.py abacad abacadabrabracabracadabrabrabracad
* text: abacadabrabracabracadabrabrabracad
* pattern: abacad
*
"""


class KMP:

def __init__(self, pattern):
self.pattern = pattern
self.R = 256
M = len(pattern)
self.dfa = [[0 for x in range(M)] for y in range(self.R)]
self.dfa[self.char_at(pattern, 0)][0] = 1
X = 0
for j in range(1, M):
for c in range(self.R):
self.dfa[c][j] = self.dfa[c][X]
self.dfa[self.char_at(pattern, j)][j] = j + 1
X = self.dfa[self.char_at(pattern, j)][X]

def search(self, txt):
N, M = len(txt), len(self.pattern)
i, j = 0, 0
while i < N and j < M:
j = self.dfa[self.char_at(txt, i)][j]
i += 1
# Found (hit end of pattern)
if j == M:
return i - M
# Not Found (hit end of text)
return N

def char_at(self, s, d):
return ord(s[d])


if __name__ == "__main__":
import sys
pat, txt = sys.argv[1], sys.argv[2]
kmp = KMP(pat)
offset = kmp.search(txt)
print("text: " + txt)
print("pattern: ", end="")
for i in range(offset):
print(" ", end="")
print(pat)
2 changes: 1 addition & 1 deletion algs4/multiway.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
* Compilation: python multiway.py
* Execution: python multiway.py
* Data files: https://algs4.cs.princeton.edu/24pq/m1.txt
* https://algs4.cs.princeton.edu/24pq/m2.txt
* https://algs4.cs.princeton.edu/24pq/m3.txt
Expand Down

0 comments on commit 1be234e

Please sign in to comment.