|
| 1 | +package huffman |
| 2 | + |
| 3 | +import "sort" |
| 4 | + |
| 5 | +const ( |
| 6 | + huffmanMaxNodes = (HuffmanMaxSymbols)*2 + 1 // +1 for additional EOF symbol |
| 7 | + huffmanLookupTableBits = 10 |
| 8 | + huffmanLookupTableSize = (1 << huffmanLookupTableBits) |
| 9 | + huffmanLookupTableMask = (huffmanLookupTableSize - 1) |
| 10 | +) |
| 11 | + |
| 12 | +var ( |
| 13 | + // DefaultDictionary is a huffman dictionary that is used to encode and decode data. |
| 14 | + // It is defined as a global variable in order to avoid re-creating it every time, as that is expensive. |
| 15 | + // This global value can be changed to a custom dictionary if needed which will then be reused globally. |
| 16 | + DefaultDictionary = NewDictionary() |
| 17 | + |
| 18 | + // TeeworldsFrequencyTable is the one used in Teeworlds by default. |
| 19 | + // The C++ implementation has an additional frequency on |
| 20 | + // the 256th index with the value 1517 which is overwritten |
| 21 | + // in the huffman constructor anyway, making it obsolete |
| 22 | + TeeworldsFrequencyTable = [HuffmanMaxSymbols]uint32{ |
| 23 | + 1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186, |
| 24 | + 283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176, |
| 25 | + 872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19, |
| 26 | + 16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68, |
| 27 | + 59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9, |
| 28 | + 20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5, |
| 29 | + 15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71, |
| 30 | + 148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80, |
| 31 | + 167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60, |
| 32 | + 136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21, |
| 33 | + 32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21, |
| 34 | + 12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18, |
| 35 | + 9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, |
| 36 | + } |
| 37 | +) |
| 38 | + |
| 39 | +// Dictionary is a huffman lookup table/tree that is used to lookup symbols and their corresponding huffman codes. |
| 40 | +type Dictionary struct { |
| 41 | + nodes [huffmanMaxNodes]node |
| 42 | + decodeLut [huffmanLookupTableSize]*node |
| 43 | + startNode *node |
| 44 | + numNodes uint16 |
| 45 | +} |
| 46 | + |
| 47 | +type node struct { |
| 48 | + // symbol |
| 49 | + Bits uint32 |
| 50 | + NumBits uint8 |
| 51 | + |
| 52 | + // don't use pointers for this. shorts are smaller so we can fit more data into the cache |
| 53 | + Leafs [2]uint16 |
| 54 | + |
| 55 | + // what the symbol represents |
| 56 | + Symbol byte |
| 57 | +} |
| 58 | + |
| 59 | +// NewDictionary returns a initialized lookup table that uses the Teeworlds' default frequency table, |
| 60 | +// which can be found as TeeworldsFrequencyTable global variable. |
| 61 | +func NewDictionary() *Dictionary { |
| 62 | + return NewDictionaryWithFrequencies(TeeworldsFrequencyTable) |
| 63 | +} |
| 64 | + |
| 65 | +func NewDictionaryWithFrequencies(frequencyTable [HuffmanMaxSymbols]uint32) *Dictionary { |
| 66 | + |
| 67 | + d := Dictionary{} |
| 68 | + d.constructTree(frequencyTable) |
| 69 | + |
| 70 | + // build decode lookup table (LUT) |
| 71 | + for i := 0; i < huffmanLookupTableSize; i++ { |
| 72 | + var ( |
| 73 | + bits uint32 = uint32(i) |
| 74 | + k int |
| 75 | + n = d.startNode |
| 76 | + ) |
| 77 | + |
| 78 | + for k = 0; k < huffmanLookupTableBits; k++ { |
| 79 | + n = &d.nodes[n.Leafs[bits&1]] |
| 80 | + bits >>= 1 |
| 81 | + |
| 82 | + if n.NumBits > 0 { |
| 83 | + d.decodeLut[i] = n |
| 84 | + break |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + if k == huffmanLookupTableBits { |
| 89 | + d.decodeLut[i] = n |
| 90 | + } |
| 91 | + |
| 92 | + } |
| 93 | + return &d |
| 94 | +} |
| 95 | + |
| 96 | +func (d *Dictionary) setBitsR(n *node, bits uint32, depth uint8) { |
| 97 | + var ( |
| 98 | + newBits uint32 |
| 99 | + left = n.Leafs[0] |
| 100 | + right = n.Leafs[1] |
| 101 | + ) |
| 102 | + |
| 103 | + if right < 0xffff { |
| 104 | + newBits = bits | (1 << depth) |
| 105 | + d.setBitsR(&d.nodes[right], newBits, depth+1) |
| 106 | + } |
| 107 | + if left < 0xffff { |
| 108 | + newBits = bits |
| 109 | + d.setBitsR(&d.nodes[left], newBits, depth+1) |
| 110 | + } |
| 111 | + |
| 112 | + if n.NumBits > 0 { |
| 113 | + n.Bits = bits |
| 114 | + n.NumBits = depth |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +func (d *Dictionary) constructTree(frequencyTable [HuffmanMaxSymbols]uint32) { |
| 119 | + |
| 120 | + var ( |
| 121 | + // +1 for additional EOF symbol |
| 122 | + nodesLeftStorage [HuffmanMaxSymbols + 1]constructNode |
| 123 | + nodesLeft [HuffmanMaxSymbols + 1]*constructNode |
| 124 | + numNodesLeft = HuffmanMaxSymbols + 1 |
| 125 | + |
| 126 | + n *node |
| 127 | + ns *constructNode |
| 128 | + ) |
| 129 | + |
| 130 | + // +1 for EOF symbol |
| 131 | + for i := uint16(0); i < HuffmanMaxSymbols+1; i++ { |
| 132 | + n = &d.nodes[i] |
| 133 | + n.NumBits = 0xff |
| 134 | + n.Symbol = byte(i) |
| 135 | + n.Leafs[0] = 0xffff |
| 136 | + n.Leafs[1] = 0xffff |
| 137 | + |
| 138 | + ns = &nodesLeftStorage[i] |
| 139 | + if i == HuffmanEOFSymbol { |
| 140 | + ns.frequency = 1 |
| 141 | + } else { |
| 142 | + ns.frequency = frequencyTable[i] |
| 143 | + } |
| 144 | + ns.nodeID = i |
| 145 | + nodesLeft[i] = ns |
| 146 | + } |
| 147 | + |
| 148 | + d.numNodes = HuffmanMaxSymbols + 1 // +1 for EOF symbol |
| 149 | + for numNodesLeft > 1 { |
| 150 | + |
| 151 | + sort.Stable(byFrequencyDesc(nodesLeft[:numNodesLeft])) |
| 152 | + |
| 153 | + n = &d.nodes[d.numNodes] |
| 154 | + n1 := numNodesLeft - 1 |
| 155 | + n2 := numNodesLeft - 2 |
| 156 | + |
| 157 | + n.NumBits = 0 |
| 158 | + n.Leafs[0] = nodesLeft[n1].nodeID |
| 159 | + n.Leafs[1] = nodesLeft[n2].nodeID |
| 160 | + |
| 161 | + freq1 := nodesLeft[n1].frequency |
| 162 | + freq2 := nodesLeft[n2].frequency |
| 163 | + |
| 164 | + nodesLeft[n2].nodeID = d.numNodes |
| 165 | + nodesLeft[n2].frequency = freq1 + freq2 |
| 166 | + |
| 167 | + d.numNodes++ |
| 168 | + numNodesLeft-- |
| 169 | + } |
| 170 | + |
| 171 | + d.startNode = n |
| 172 | + d.setBitsR(n, 0, 0) |
| 173 | +} |
| 174 | + |
| 175 | +type constructNode struct { |
| 176 | + nodeID uint16 |
| 177 | + frequency uint32 |
| 178 | +} |
| 179 | + |
| 180 | +type byFrequencyDesc []*constructNode |
| 181 | + |
| 182 | +func (a byFrequencyDesc) Len() int { return len(a) } |
| 183 | +func (a byFrequencyDesc) Swap(i, j int) { *a[i], *a[j] = *a[j], *a[i] } |
| 184 | +func (a byFrequencyDesc) Less(i, j int) bool { return a[i].frequency > a[j].frequency } |
0 commit comments