-
Notifications
You must be signed in to change notification settings - Fork 274
/
lz77.c
362 lines (245 loc) · 8.34 KB
/
lz77.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
//
// lz77.c
// Algorithms - LZ77(Lempel-Ziv-1977)compress
//
// Created by YourtionGuo on 19/05/2017.
// Copyright © 2017 Yourtion. All rights reserved.
//
#include <netinet/in.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "bit.h"
#include "compress.h"
#pragma mark - Private
/**
确定在前向缓冲区与滑动窗口中匹配的最长短语
@param window 滑动窗口
@param buffer 前向缓冲区
@param offset 滑动窗口中匹配串的位置
@param next 前向缓冲区中匹配串后一位的符号
@return 匹配的最长短语长度
*/
static int compare_win(const unsigned char *window,
const unsigned char *buffer,
int *offset, unsigned char *next)
{
int match, longest, i, j, k;
/// 初始化 offset,尽管只会在找到结果时赋值
*offset = 0;
/// 如果没找到匹配,则返回 0 并将 next 指向前向缓冲区
longest = 0;
*next = buffer[0];
/// 在滑动窗口中找到前向缓冲区的最佳匹配
for (k = 0; k < LZ77_WINDOW_SIZE; k++) {
i = k;
j = 0;
match = 0;
/// 确定有滑动窗口偏移 k 中有多少符号匹配
while (i < LZ77_WINDOW_SIZE && j < LZ77_BUFFER_SIZE - 1) {
if (window[i] != buffer[j]) break;
match++;
i++;
j++;
}
/// 保存偏移量,长度以及最后符号位置
if (match > longest) {
*offset = k;
longest = match;
*next = buffer[j];
}
}
return longest;
}
#pragma mark - Public
int lz77_compress(const unsigned char *original, unsigned char **compressed, int size)
{
unsigned int token;
unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *comp, *temp, next;
int offset, length, remaining, tbits, hsize, ipos, opos, tpos, i;
/// 初始化最初的压缩结果为空
*compressed = NULL;
/// 写入头部数据
hsize = sizeof(int);
if ((comp = (unsigned char *)malloc(hsize)) == NULL) return -1;
memcpy(comp, &size, sizeof(int));
/// 初始化滑动窗口和前向缓冲区
memset(window, 0, LZ77_WINDOW_SIZE);
memset(buffer, 0, LZ77_BUFFER_SIZE);
/// 加载前向缓冲区内容
ipos = 0;
for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++) {
buffer[i] = original[ipos];
ipos++;
}
/// 压缩数据
opos = hsize * 8;
remaining = size;
while (remaining > 0) {
if ((length = compare_win(window, buffer, &offset, &next)) != 0) {
/// 编码短语标记
token = 0x00000001 << (LZ77_PHRASE_BITS - 1);
/// 设置偏移量当在滑动窗口中找到结果
token = token | (offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS -
LZ77_WINOFF_BITS));
/// 设置最大匹配长度
token = token | (length << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS -
LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS));
/// 设置前向缓冲区中匹配后的下一个符号
token = token | next;
/// 设置短语标记中的位长度
tbits = LZ77_PHRASE_BITS;
} else {
/// 编码短语标记
token = 0x00000000;
/// 设置未找到的符号
token = token | next;
/// 设置短语标记中的位长度
tbits = LZ77_SYMBOL_BITS;
}
/// 确保短语标记是以大端字节格式存放
token = htonl(token);
/// 将短语标记写入压缩后数据缓冲区
for (i = 0; i < tbits; i++) {
if (opos % 8 == 0) {
/// 初始化另外的字节以存放新短语标记
if ((temp = (unsigned char *)realloc(comp,(opos / 8) + 1)) == NULL) {
free(comp);
return -1;
}
comp = temp;
}
tpos = (sizeof(unsigned int) * 8) - tbits + i;
bit_set(comp, opos, bit_get((unsigned char *)&token, tpos));
opos++;
}
/// 调整短语长度以描述未匹配符号
length++;
/// 将前向缓冲区中的数据拷贝到滑动窗口
memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);
/// 加载更多数据到前向缓冲区
memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length);
for (i = LZ77_BUFFER_SIZE - length; i<LZ77_BUFFER_SIZE && ipos<size; i++) {
buffer[i] = original[ipos];
ipos++;
}
/// 调整剩余符号的数量
remaining = remaining - length;
}
/// 将缓冲区指向已压缩数据
*compressed = comp;
/// 返回压缩后数据的长度
return ((opos - 1) / 8) + 1;
}
int lz77_uncompress(const unsigned char *compressed, unsigned char **original) {
unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *orig, *temp, next;
int offset, length, remaining, hsize, size, ipos, opos, tpos, state, i;
/// 初始化最初的解压结果为空
*original = orig = NULL;
/// 获取压缩数据中的头部信息
hsize = sizeof(int);
memcpy(&size, compressed, sizeof(int));
/// 初始化滑动窗口和前向缓冲区
memset(window, 0, LZ77_WINDOW_SIZE);
memset(buffer, 0, LZ77_BUFFER_SIZE);
/// 解压数据
ipos = hsize * 8;
opos = 0;
remaining = size;
while (remaining > 0) {
/// 获取压缩数据的下一位
state = bit_get(compressed, ipos);
ipos++;
if (state == 1) {
/// 处理短语标记
memset(&offset, 0, sizeof(int));
for (i = 0; i < LZ77_WINOFF_BITS; i++) {
tpos = (sizeof(int) * 8) - LZ77_WINOFF_BITS + i;
bit_set((unsigned char *)&offset, tpos, bit_get(compressed, ipos));
ipos++;
}
memset(&length, 0, sizeof(int));
for (i = 0; i < LZ77_BUFLEN_BITS; i++) {
tpos = (sizeof(int) * 8) - LZ77_BUFLEN_BITS + i;
bit_set((unsigned char *)&length, tpos, bit_get(compressed, ipos));
ipos++;
}
next = 0x00;
for (i = 0; i < LZ77_NEXT_BITS; i++) {
tpos = (sizeof(unsigned char) * 8) - LZ77_NEXT_BITS + i;
bit_set((unsigned char *)&next, tpos, bit_get(compressed, ipos));
ipos++;
}
/// 确保当前编码是以大端字节格式读取
offset = ntohl(offset);
length = ntohl(length);
/// 将滑动窗口中的短语写入原始数据缓冲区
i = 0;
if (opos > 0) {
if ((temp = (unsigned char *)realloc(orig, opos+length+1)) == NULL) {
free(orig);
return -1;
}
orig = temp;
} else {
if ((orig = (unsigned char *)malloc(length + 1)) == NULL) return -1;
}
while (i < length && remaining > 0) {
orig[opos] = window[offset + i];
opos++;
/// 记录前向缓冲区中的符号直到更新入滑动窗口
buffer[i] = window[offset + i];
i++;
/// 更新生于的需要处理的符号数量
remaining--;
}
/// 将未匹配的符号写入元素数据缓冲区
if (remaining > 0) {
orig[opos] = next;
opos++;
/// 同时记录当前符号到前向缓冲区中
buffer[i] = next;
/// 调整剩余需要处理的未匹配符号
remaining--;
}
/// 调整未匹配符号的短语长度
length++;
} else {
/// 处理已匹配的短语标记
next = 0x00;
for (i = 0; i < LZ77_NEXT_BITS; i++) {
tpos = (sizeof(unsigned char) * 8) - LZ77_NEXT_BITS + i;
bit_set((unsigned char *)&next, tpos, bit_get(compressed, ipos));
ipos++;
}
/// 将符号写入前向缓冲区
if (opos > 0) {
if ((temp = (unsigned char *)realloc(orig, opos + 1)) == NULL) {
free(orig);
return -1;
}
orig = temp;
} else {
if ((orig = (unsigned char *)malloc(1)) == NULL) return -1;
}
orig[opos] = next;
opos++;
/// 记录前向缓冲区中的符号直到更新入滑动窗口
if (remaining > 0) {
buffer[0] = next;
}
/// 调整剩余需要处理的未匹配符号
remaining--;
/// 调整未匹配符号的短语长度
length = 1;
}
/// 将前向缓冲区中的数据拷贝到滑动窗口
memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);
}
/// 将缓冲区指向已压缩数据
*original = orig;
/// 返回原始数据的长度
return opos;
}