Skip to content

Commit 4a8951a

Browse files
committed
make implementation more modular & more like stdlib compression libs, initialize the default dictionary only once
1 parent 9aa6dbc commit 4a8951a

File tree

4 files changed

+229
-177
lines changed

4 files changed

+229
-177
lines changed

dictionary.go

+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package huffman
2+
3+
import "sort"
4+
5+
const (
6+
huffmanMaxNodes = (HuffmanMaxSymbols)*2 + 1 // +1 for additional EOF symbol
7+
huffmanLookupTableBits = 10
8+
huffmanLookupTableSize = (1 << huffmanLookupTableBits)
9+
huffmanLookupTableMask = (huffmanLookupTableSize - 1)
10+
)
11+
12+
var (
13+
// DefaultDictionary is a huffman dictionary that is used to encode and decode data.
14+
// It is defined as a global variable in order to avoid re-creating it every time, as that is expensive.
15+
// This global value can be changed to a custom dictionary if needed which will then be reused globally.
16+
DefaultDictionary = NewDictionary()
17+
18+
// TeeworldsFrequencyTable is the one used in Teeworlds by default.
19+
// The C++ implementation has an additional frequency on
20+
// the 256th index with the value 1517 which is overwritten
21+
// in the huffman constructor anyway, making it obsolete
22+
TeeworldsFrequencyTable = [HuffmanMaxSymbols]uint32{
23+
1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186,
24+
283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176,
25+
872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19,
26+
16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68,
27+
59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9,
28+
20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5,
29+
15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71,
30+
148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80,
31+
167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60,
32+
136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21,
33+
32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21,
34+
12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18,
35+
9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335,
36+
}
37+
)
38+
39+
// Dictionary is a huffman lookup table/tree that is used to lookup symbols and their corresponding huffman codes.
40+
type Dictionary struct {
41+
nodes [huffmanMaxNodes]node
42+
decodeLut [huffmanLookupTableSize]*node
43+
startNode *node
44+
numNodes uint16
45+
}
46+
47+
type node struct {
48+
// symbol
49+
Bits uint32
50+
NumBits uint8
51+
52+
// don't use pointers for this. shorts are smaller so we can fit more data into the cache
53+
Leafs [2]uint16
54+
55+
// what the symbol represents
56+
Symbol byte
57+
}
58+
59+
// NewDictionary returns a initialized lookup table that uses the Teeworlds' default frequency table,
60+
// which can be found as TeeworldsFrequencyTable global variable.
61+
func NewDictionary() *Dictionary {
62+
return NewDictionaryWithFrequencies(TeeworldsFrequencyTable)
63+
}
64+
65+
func NewDictionaryWithFrequencies(frequencyTable [HuffmanMaxSymbols]uint32) *Dictionary {
66+
67+
d := Dictionary{}
68+
d.constructTree(frequencyTable)
69+
70+
// build decode lookup table (LUT)
71+
for i := 0; i < huffmanLookupTableSize; i++ {
72+
var (
73+
bits uint32 = uint32(i)
74+
k int
75+
n = d.startNode
76+
)
77+
78+
for k = 0; k < huffmanLookupTableBits; k++ {
79+
n = &d.nodes[n.Leafs[bits&1]]
80+
bits >>= 1
81+
82+
if n.NumBits > 0 {
83+
d.decodeLut[i] = n
84+
break
85+
}
86+
}
87+
88+
if k == huffmanLookupTableBits {
89+
d.decodeLut[i] = n
90+
}
91+
92+
}
93+
return &d
94+
}
95+
96+
func (d *Dictionary) setBitsR(n *node, bits uint32, depth uint8) {
97+
var (
98+
newBits uint32
99+
left = n.Leafs[0]
100+
right = n.Leafs[1]
101+
)
102+
103+
if right < 0xffff {
104+
newBits = bits | (1 << depth)
105+
d.setBitsR(&d.nodes[right], newBits, depth+1)
106+
}
107+
if left < 0xffff {
108+
newBits = bits
109+
d.setBitsR(&d.nodes[left], newBits, depth+1)
110+
}
111+
112+
if n.NumBits > 0 {
113+
n.Bits = bits
114+
n.NumBits = depth
115+
}
116+
}
117+
118+
func (d *Dictionary) constructTree(frequencyTable [HuffmanMaxSymbols]uint32) {
119+
120+
var (
121+
// +1 for additional EOF symbol
122+
nodesLeftStorage [HuffmanMaxSymbols + 1]constructNode
123+
nodesLeft [HuffmanMaxSymbols + 1]*constructNode
124+
numNodesLeft = HuffmanMaxSymbols + 1
125+
126+
n *node
127+
ns *constructNode
128+
)
129+
130+
// +1 for EOF symbol
131+
for i := uint16(0); i < HuffmanMaxSymbols+1; i++ {
132+
n = &d.nodes[i]
133+
n.NumBits = 0xff
134+
n.Symbol = byte(i)
135+
n.Leafs[0] = 0xffff
136+
n.Leafs[1] = 0xffff
137+
138+
ns = &nodesLeftStorage[i]
139+
if i == HuffmanEOFSymbol {
140+
ns.frequency = 1
141+
} else {
142+
ns.frequency = frequencyTable[i]
143+
}
144+
ns.nodeID = i
145+
nodesLeft[i] = ns
146+
}
147+
148+
d.numNodes = HuffmanMaxSymbols + 1 // +1 for EOF symbol
149+
for numNodesLeft > 1 {
150+
151+
sort.Stable(byFrequencyDesc(nodesLeft[:numNodesLeft]))
152+
153+
n = &d.nodes[d.numNodes]
154+
n1 := numNodesLeft - 1
155+
n2 := numNodesLeft - 2
156+
157+
n.NumBits = 0
158+
n.Leafs[0] = nodesLeft[n1].nodeID
159+
n.Leafs[1] = nodesLeft[n2].nodeID
160+
161+
freq1 := nodesLeft[n1].frequency
162+
freq2 := nodesLeft[n2].frequency
163+
164+
nodesLeft[n2].nodeID = d.numNodes
165+
nodesLeft[n2].frequency = freq1 + freq2
166+
167+
d.numNodes++
168+
numNodesLeft--
169+
}
170+
171+
d.startNode = n
172+
d.setBitsR(n, 0, 0)
173+
}
174+
175+
type constructNode struct {
176+
nodeID uint16
177+
frequency uint32
178+
}
179+
180+
type byFrequencyDesc []*constructNode
181+
182+
func (a byFrequencyDesc) Len() int { return len(a) }
183+
func (a byFrequencyDesc) Swap(i, j int) { *a[i], *a[j] = *a[j], *a[i] }
184+
func (a byFrequencyDesc) Less(i, j int) bool { return a[i].frequency > a[j].frequency }

0 commit comments

Comments
 (0)