From 786ef4c9dda24f4696f93f63a6a6385a70cde43d Mon Sep 17 00:00:00 2001 From: shellfly Date: Sun, 9 Jun 2019 16:09:14 +0800 Subject: [PATCH] remove duplication binary search symbol table code --- algs4/binary_search.py | 52 +++++++++++++++++++++++ algs4/binary_search_st.py | 88 --------------------------------------- 2 files changed, 52 insertions(+), 88 deletions(-) create mode 100644 algs4/binary_search.py delete mode 100644 algs4/binary_search_st.py diff --git a/algs4/binary_search.py b/algs4/binary_search.py new file mode 100644 index 0000000..69f4a51 --- /dev/null +++ b/algs4/binary_search.py @@ -0,0 +1,52 @@ +""" +* Execution: python binary_search.py whitelist.txt < input.txt +* Data files: https://algs4.cs.princeton.edu/11model/tinyW.txt +* https://algs4.cs.princeton.edu/11model/tinyT.txt +* https://algs4.cs.princeton.edu/11model/largeW.txt +* https://algs4.cs.princeton.edu/11model/largeT.txt +* +* % python binary_search.py tinyW.txt < tinyT.txt +* 50 +* 99 +* 13 +* +* % python binary_search.py largeW.txt < largeT.txt | more +* 499569 +* 984875 +* 295754 +* 207807 +* 140925 +* 161828 +* [367, 966 total values] +* +""" + + +class BinarySearch: + def index_of(self, arr, key): + lo, hi = 0, len(arr) - 1 + while lo <= hi: + # key is in arr[lo..hi] or not present. + mid = lo + int((hi - lo) / 2) + if (key < arr[mid]): + hi = mid - 1 + elif (key > arr[mid]): + lo = mid + 1 + else: + return mid + return -1 + + +if __name__ == '__main__': + import sys + # read the integers from a file + with open(sys.argv[1]) as f: + whitelist = [int(i) for i in f] + whitelist = sorted(whitelist) + + # read integer key from standard input; print if not in whitelist + bs = BinarySearch() + for line in sys.stdin: + key = int(line) + if bs.index_of(whitelist, key) == -1: + print(key) diff --git a/algs4/binary_search_st.py b/algs4/binary_search_st.py deleted file mode 100644 index d9aa3ce..0000000 --- a/algs4/binary_search_st.py +++ /dev/null @@ -1,88 +0,0 @@ -from algs4.utils.st import Node, STKeyIterator - - -class BinarySearchST: - - def __init__(self): - self.keys = [] - self.vals = [] - self.size = 0 - - def contains(self, key): - x = self.first - while x: - if key == x.key: - return True - x = x.next - return False - - def rank(self, key): - lo = 0 - hi = self.size - 1 - while lo <= hi: - mid = lo + (hi - lo) / 2 - if key < self.keys[mid]: - hi = mid - 1 - elif key > self.keys[mid]: - lo = lo + 1 - else: - return mid - return lo - - def get(self, key): - i = self.rank(key) - if i < self.size and self.keys[i] == key: - return self.vals[i] - else: - return None - - def put(self, key, val): - i = self.rank(key) - if i < self.size and self.keys[i] == key: - self.vals[i] = val - return - - self.keys.append(key) - self.vals.append(val) - j = self.size - 1 - while j > i: - self.keys[j] = self.keys[j - 1] - self.vals[j] = self.vals[j - 1] - self.keys[i] = key - self.vals[i] = val - self.size += 1 - - def delete(self, key): - i = self.rank(key) - if i < self.size and self.keys[i] == key: - self.size -= 1 - for j in range(i, self.size): - self.keys[j] = self.keys[j + 1] - self.vals[j] = self.vals[j + 1] - self.keys[-1] = None - self.vals[-1] = None - - def is_empty(self): - return self.size == 0 - - def min(self): - return self.keys[0] - - def max(self): - return self.keys[-1] - - def select(self, k): - return self.keys[k] - - def ceiling(self, key): - i = self.rank(key) - if i == self.size: - return None - - return self.keys[i] - - def floor(self, key): - i = self.rank(key) - if i == 0: - return None - return self.keys[i - 1]