From 42cde7fd3a6d487f7cce4009bd39a1772e3fdf3e Mon Sep 17 00:00:00 2001 From: shellfly Date: Thu, 6 Feb 2020 20:09:34 +0800 Subject: [PATCH] add msd --- README.md | 4 ++-- algs4/lsd.py | 24 +++++++++----------- algs4/msd.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 algs4/msd.py diff --git a/README.md b/README.md index df58731..ed8f62c 100644 --- a/README.md +++ b/README.md @@ -79,9 +79,9 @@ Try to keep the interface and variable name consistent with the original book wh * [BellmanFordSP](algs4/bellman_ford_sp.py) * 5 STRING - * [LSD](algs4/lsd.py) - + * [MSD](algs4/msd.py) + * [Quick3string](algs4/quick3_string.py) ## License This code is released under MIT. diff --git a/algs4/lsd.py b/algs4/lsd.py index 064a210..e53455b 100644 --- a/algs4/lsd.py +++ b/algs4/lsd.py @@ -1,9 +1,9 @@ """ - Execution: python lsd.py input.txt + Execution: python lsd.py < input.txt Data files: https://algs4.cs.princeton.edu/51radix/words3.txt - % python lsd.py words3.txt + % python lsd.py < words3.txt all bad bed @@ -17,17 +17,16 @@ class LSD: - + R = 256 @classmethod def sort(cls, a, w): n = len(a) - R = 256 aux = ['' for _ in range(n)] for d in range(w-1, -1, -1): - count = [0 for _ in range(R+1)] + count = [0 for _ in range(cls.R+1)] for i in range(n): count[ord(a[i][d])+1] += 1 - for r in range(R): + for r in range(cls.R): count[r+1] += count[r] for i in range(n): aux[count[ord(a[i][d])]] = a[i] @@ -35,13 +34,12 @@ def sort(cls, a, w): for i in range(n): a[i] = aux[i] + if __name__ == '__main__': import sys - lst = [] - with open(sys.argv[1]) as fp: - for line in fp: - for x in line.split(' '): - lst.append(x.strip()) - LSD.sort(lst, len(lst[0])) - for item in lst: + words = [] + for line in sys.stdin: + words.extend(line.split()) + LSD.sort(words, len(words[0])) + for item in words: print(item) diff --git a/algs4/msd.py b/algs4/msd.py new file mode 100644 index 0000000..c8d6b5d --- /dev/null +++ b/algs4/msd.py @@ -0,0 +1,63 @@ +""" + Execution: python msd.py < input.txt + + Data files: https://algs4.cs.princeton.edu/51radix/words3.txt + + % python msd.py < words3.txt + all + bad + bed + bug + dad + ... + yes + yet + zoo +""" + + +class MSD: + R = 256 # extended ASCII alphabet size + CUTOFF = 15 # cutoff to insertion sort + + def __init__(self, a): + self.aux = ["" for _ in range(len(a))] + self.sort(a, 0, len(a)-1, 0) + + def sort(self, a, lo, hi, d): + if hi <= lo + self.CUTOFF: + self.insertion(a, lo, hi, d) + return + + count = [0 for _ in range(self.R+2)] + for i in range(lo, hi+1): + count[self.char_at(a[i], d)+2] += 1 + for r in range(self.R+1): + count[r+1] += count[r] + for i in range(lo, hi+1): + self.aux[count[self.char_at(a[i], d)+1]] = a[i] + count[self.char_at(a[i], d)+1] += 1 + for i in range(lo, hi+1): + a[i] = self.aux[i-lo] + for r in range(self.R): + self.sort(a, lo+count[r], lo+count[r+1]-1, d+1) + + def insertion(self, a, lo, hi, d): + for i in range(lo, hi+1): + j = i + while j > lo and a[j][d] < a[j-1][d]: + a[j], a[j-1] = a[j-1], a[j] + j -= 1 + + def char_at(self, s, d): + return ord(s[d]) + + +if __name__ == '__main__': + import sys + words = [] + for line in sys.stdin: + words.extend(line.split()) + MSD(words) + for item in words: + print(item)