-
Notifications
You must be signed in to change notification settings - Fork 78
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
321 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
""" | ||
* Execution: python binarydump.py n < file | ||
* Data file: https://introcs.cs.princeton.edu/stdlib/abra.txt | ||
* | ||
* Reads in a binary file and writes out the bits, n per line. | ||
* | ||
* % more abra.txt | ||
* ABRACADABRA! | ||
* | ||
* % python binarydump.py 16 < abra.txt | ||
* 0100000101000010 | ||
* 0101001001000001 | ||
* 0100001101000001 | ||
* 0100010001000001 | ||
* 0100001001010010 | ||
* 0100000100100001 | ||
* 96 bits | ||
* | ||
""" | ||
|
||
import sys | ||
from algs4.binarystdin import BinaryStdin | ||
|
||
bits_per_line = 16 | ||
if len(sys.argv) == 2: | ||
bits_per_line = int(sys.argv[1]) | ||
count = 0 | ||
while not BinaryStdin.is_empty(): | ||
if bits_per_line == 0: | ||
BinaryStdin.read_bool() | ||
continue | ||
elif count != 0 and count % bits_per_line == 0: | ||
print() | ||
if BinaryStdin.read_bool(): | ||
print(1, end="") | ||
else: | ||
print(0, end="") | ||
count += 1 | ||
if bits_per_line != 0: | ||
print() | ||
print(count, "bits") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import sys | ||
|
||
|
||
class BinaryStdin: | ||
buffer = 0 | ||
n = 0 | ||
initialized = False | ||
|
||
@classmethod | ||
def read_str(cls): | ||
if cls.is_empty(): | ||
raise Exception("reading from empty input stream") | ||
s = [] | ||
while not cls.is_empty(): | ||
b = cls.read_byte() | ||
s.append(chr(b)) | ||
return "".join(s) | ||
|
||
@classmethod | ||
def read_int(cls): | ||
if cls.is_empty(): | ||
raise Exception("reading from empty input stream") | ||
x = 0 | ||
for i in range(4): | ||
b = cls.read_byte() | ||
x <<= 8 | ||
x |= b | ||
return x | ||
|
||
@classmethod | ||
def read_int_r(cls, r): | ||
if r < 1 or r > 32: | ||
raise Exception("invalid r") | ||
if r == 32: | ||
return cls.read_int() | ||
x = 0 | ||
for i in range(r): | ||
x <<= 1 | ||
bit = cls.read_bool() | ||
if bit: | ||
x |= 1 | ||
return x | ||
|
||
@classmethod | ||
def read_byte(cls): | ||
if cls.is_empty(): | ||
raise Exception("reading from empty input stream") | ||
if cls.n == 8: | ||
b = cls.buffer | ||
cls.fill_buffer() | ||
return b | ||
x = cls.buffer | ||
x <<= (8-cls.n) | ||
old_n = cls.n | ||
cls.fill_buffer() | ||
cls.n = old_n | ||
x |= (cls.buffer >> cls.n) | ||
return x & 0xff | ||
|
||
@classmethod | ||
def read_bool(cls): | ||
if cls.is_empty(): | ||
raise Exception("reading from empty input stream") | ||
cls.n -= 1 | ||
bit = (cls.buffer >> cls.n & 1) == 1 | ||
if cls.n == 0: | ||
cls.fill_buffer() | ||
return bit | ||
|
||
@classmethod | ||
def fill_buffer(cls): | ||
byte = sys.stdin.buffer.read(1) | ||
if byte == b'': | ||
cls.buffer = EOFError | ||
cls.n = -1 | ||
return | ||
cls.n = 8 | ||
cls.buffer = ord(byte) | ||
|
||
@classmethod | ||
def initialize(cls): | ||
cls.fill_buffer() | ||
cls.initialized = True | ||
|
||
@classmethod | ||
def is_empty(cls): | ||
if not cls.initialized: | ||
cls.initialize() | ||
return cls.buffer == EOFError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import sys | ||
|
||
|
||
class BinaryStdout: | ||
buffer = 0 | ||
n = 0 | ||
|
||
@classmethod | ||
def write_int(cls, x): | ||
cls.write_byte((x >> 24) & 0xff) | ||
cls.write_byte((x >> 16) & 0xff) | ||
cls.write_byte((x >> 8) & 0xff) | ||
cls.write_byte((x >> 0) & 0xff) | ||
|
||
@classmethod | ||
def write_bits(cls, x, r): | ||
if r < 1 or r > 32: | ||
raise Exception("invalid r") | ||
if r == 32: | ||
return cls.write_int(x) | ||
for i in range(r): | ||
bit = ((x >> (r - i - 1)) & 1) == 1 | ||
cls.write_bit(bit) | ||
|
||
@classmethod | ||
def write_byte(cls, b): | ||
for i in range(8): | ||
bit = ((b >> (8 - i - 1)) & 1) == 1 | ||
cls.write_bit(bit) | ||
|
||
@classmethod | ||
def write_bit(cls, bit): | ||
cls.buffer <<= 1 | ||
if bit: | ||
cls.buffer |= 1 | ||
|
||
cls.n += 1 | ||
if cls.n == 8: | ||
cls.clear_buffer() | ||
|
||
@classmethod | ||
def clear_buffer(cls): | ||
if cls.n == 0: | ||
return | ||
if cls.n > 0: | ||
cls.buffer <<= (8-cls.n) | ||
sys.stdout.buffer.write(bytes([cls.buffer])) | ||
cls.n = 0 | ||
cls.buffer = 0 | ||
|
||
@classmethod | ||
def close(cls): | ||
cls.clear_buffer() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
""" | ||
* Execution: python huffman.py - < input.txt (compress) | ||
* Execution: python huffman.py + < input.txt (expand) | ||
* Data files: https://algs4.cs.princeton.edu/55compression/abra.txt | ||
* https://algs4.cs.princeton.edu/55compression/tinytinyTale.txt | ||
* https://algs4.cs.princeton.edu/55compression/medTale.txt | ||
* https://algs4.cs.princeton.edu/55compression/tale.txt | ||
* | ||
* Compress or expand a binary input stream using the Huffman algorithm. | ||
* | ||
* % python huffman.py - < abra.txt | python binarydump.py 60 | ||
* 010100000100101000100010010000110100001101010100101010000100 | ||
* 000000000000000000000000000110001111100101101000111110010100 | ||
* 120 bits | ||
* | ||
* % python huffman.py - < abra.txt | python huffman.py + | ||
* ABRACADABRA! | ||
* | ||
""" | ||
|
||
from algs4.binarystdin import BinaryStdin | ||
from algs4.binarystdout import BinaryStdout | ||
from algs4.min_pq import MinPQ | ||
|
||
|
||
class Node: | ||
def __init__(self, ch, freq, left, right): | ||
self.ch = ch | ||
self.freq = freq | ||
self.left = left | ||
self.right = right | ||
|
||
def __str__(self): | ||
return "%s %d" % (self.ch, self.freq) | ||
|
||
def is_leaf(self): | ||
return self.left == None and self.right == None | ||
|
||
def __lt__(self, other): | ||
return self.freq < other.freq | ||
|
||
def __gt__(self, other): | ||
return self.freq > other.freq | ||
|
||
|
||
class Huffman: | ||
R = 256 | ||
@classmethod | ||
def compress(cls): | ||
s = BinaryStdin.read_str() | ||
freq = [0 for _ in range(cls.R)] | ||
for i in range(len(s)): | ||
freq[ord(s[i])] += 1 | ||
|
||
# build huffman trie | ||
root = cls.build_trie(freq) | ||
|
||
# build code table | ||
st = [None for _ in range(cls.R)] | ||
cls.build_code(st, root, "") | ||
|
||
# print trie for decoder | ||
cls.write_trie(root) | ||
|
||
# print number of bytes in original uncompressed message | ||
BinaryStdout.write_int(len(s)) | ||
# use Huffman code to encode input | ||
for i in range(len(s)): | ||
code = st[ord(s[i])] | ||
for j in range(len(code)): | ||
if code[j] == "0": | ||
BinaryStdout.write_bit(False) | ||
elif code[j] == "1": | ||
BinaryStdout.write_bit(True) | ||
BinaryStdout.close() | ||
|
||
@classmethod | ||
def build_trie(cls, freq): | ||
pq = MinPQ() | ||
for c in range(cls.R): | ||
if freq[c] > 0: | ||
pq.insert(Node(chr(c), freq[c], None, None)) | ||
while pq.size() > 1: | ||
left = pq.del_min() | ||
right = pq.del_min() | ||
parent = Node(chr(0), left.freq+right.freq, left, right) | ||
pq.insert(parent) | ||
return pq.del_min() | ||
|
||
@classmethod | ||
def write_trie(cls, x): | ||
if x.is_leaf(): | ||
BinaryStdout.write_bit(True) | ||
BinaryStdout.write_byte(ord(x.ch)) | ||
return | ||
BinaryStdout.write_bit(False) | ||
cls.write_trie(x.left) | ||
cls.write_trie(x.right) | ||
|
||
@classmethod | ||
def build_code(cls, st, x, s): | ||
if not x.is_leaf(): | ||
cls.build_code(st, x.left, s+"0") | ||
cls.build_code(st, x.right, s+"1") | ||
else: | ||
st[ord(x.ch)] = s | ||
|
||
@classmethod | ||
def expand(cls): | ||
root = read_trie() | ||
length = BinaryStdin.read_int() | ||
for i in range(length): | ||
x = root | ||
while not x.is_leaf(): | ||
bit = BinaryStdin.read_bool() | ||
if bit: | ||
x = x.right | ||
else: | ||
x = x.left | ||
BinaryStdout.write_byte(ord(x.ch)) | ||
|
||
|
||
def read_trie(): | ||
is_leaf = BinaryStdin.read_bool() | ||
if is_leaf: | ||
return Node(chr(BinaryStdin.read_byte()), 0, None, None) | ||
return Node(chr(0), 0, read_trie(), read_trie()) | ||
|
||
|
||
if __name__ == '__main__': | ||
import sys | ||
if sys.argv[1] == "-": | ||
Huffman.compress() | ||
else: | ||
Huffman.expand() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters