diff --git a/README.md b/README.md index 9bc7dab..a5c319e 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,8 @@ Try to keep the interface and variable name consistent with the original book wh * [Quick3string](algs4/quick3_string.py) * [TrieST](algs4/trie_st.py) * [TST](algs4/tst.py) + * [KMP](algs4/kmp.py) + ## License This code is released under MIT. diff --git a/algs4/kmp.py b/algs4/kmp.py new file mode 100644 index 0000000..3b12a48 --- /dev/null +++ b/algs4/kmp.py @@ -0,0 +1,71 @@ +""" + * Execution: python kmp.py pattern text + * + * Reads in two strings, the pattern and the input text, and + * searches for the pattern in the input text using the + * KMP algorithm. + * + * % python kmp.py abracadabra abacadabrabracabracadabrabrabracad + * text: abacadabrabracabracadabrabrabracad + * pattern: abracadabra + * + * % python kmp.py rab abacadabrabracabracadabrabrabracad + * text: abacadabrabracabracadabrabrabracad + * pattern: rab + * + * % python kmp.py bcara abacadabrabracabracadabrabrabracad + * text: abacadabrabracabracadabrabrabracad + * pattern: bcara + * + * % python kmp.py rabrabracad abacadabrabracabracadabrabrabracad + * text: abacadabrabracabracadabrabrabracad + * pattern: rabrabracad + * + * % python kmp.py abacad abacadabrabracabracadabrabrabracad + * text: abacadabrabracabracadabrabrabracad + * pattern: abacad + * +""" + + +class KMP: + + def __init__(self, pattern): + self.pattern = pattern + self.R = 256 + M = len(pattern) + self.dfa = [[0 for x in range(M)] for y in range(self.R)] + self.dfa[self.char_at(pattern, 0)][0] = 1 + X = 0 + for j in range(1, M): + for c in range(self.R): + self.dfa[c][j] = self.dfa[c][X] + self.dfa[self.char_at(pattern, j)][j] = j + 1 + X = self.dfa[self.char_at(pattern, j)][X] + + def search(self, txt): + N, M = len(txt), len(self.pattern) + i, j = 0, 0 + while i < N and j < M: + j = self.dfa[self.char_at(txt, i)][j] + i += 1 + # Found (hit end of pattern) + if j == M: + return i - M + # Not Found (hit end of text) + return N + + def char_at(self, s, d): + return ord(s[d]) + + +if __name__ == "__main__": + import sys + pat, txt = sys.argv[1], sys.argv[2] + kmp = KMP(pat) + offset = kmp.search(txt) + print("text: " + txt) + print("pattern: ", end="") + for i in range(offset): + print(" ", end="") + print(pat) diff --git a/algs4/multiway.py b/algs4/multiway.py index c9d2bc7..d3e5d61 100644 --- a/algs4/multiway.py +++ b/algs4/multiway.py @@ -1,5 +1,5 @@ """ -* Compilation: python multiway.py +* Execution: python multiway.py * Data files: https://algs4.cs.princeton.edu/24pq/m1.txt * https://algs4.cs.princeton.edu/24pq/m2.txt * https://algs4.cs.princeton.edu/24pq/m3.txt