#include<stdio.h>
#include<string.h>
#include "dataStruct.h"
int bit_get(const unsigned char* bits, int pos)
{
unsigned char mask;
int i;
mask = 0x80;
for (i = 0; i < (pos % 8); i++)
mask = mask >> 1;
return (((mask & bits[(int)(pos / 8)]) == mask) ? 1 : 0);
}
void bit_set(unsigned char* bits, int pos, int state)
{
unsigned char mask;
int i;
mask = 0x80;
for (i = 0; i < (pos % 8); i++)
mask = mask >> 1;
if (state)
bits[pos / 8] = bits[pos / 8] | mask;
else
bits[pos / 8] = bits[pos / 8] & (~mask);
return;
}
//最长字符串匹配
//从window中匹配Buffer中最长字符串;offset返回window中匹配首位置;next返回buffer字符串后不匹配第一个字符位置
//返回匹配最长字符串的长度
int compare_win(const unsigned char* window, const unsigned char* buffer, int *offset, unsigned char* next)
{
int match, longest, i, j, k;
/*初始化偏移量*/
*offset = 0 ;
/*如果没有找到匹配,准备在前向缓冲区中返回0和下一个字符*/
longest = 0;
*next = buffer[0];
//最外面循环在window中第1个字母—第n个字母,第2个—第n个,......,第n-1个—第n个
for (k = 0; k < LZ77_WINDOW_SIZE; k++)
{
i = k;
j = 0;
match = 0;
//在最外层循环的一个中找buffer能匹配的最长字符串(从buffer第一个符号开始)
while (i < LZ77_WINDOW_SIZE && j < LZ77_BUFFER_SIZE - 1)
{
if (window[i] != buffer[j])
break;
//match统计目前匹配的长度
match++;
i++;
j++;
}
//保存返回信息,即最佳匹配的便宜、长度和下一个符号
if (match > longest)
{
*offset = k;
longest = match;
*next = buffer[j];
}
}
return longest;
}
//LZ77压缩
int lz77_compress(const unsigned char* original, unsigned char** compressed, int size)
{
u1 window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], * comp, next, compress_buf[4096];
int offset, length, remaining, hsize, ipos, opos, tpos, i;
int token, tbits;
*compressed = NULL;//使指向压缩数据的指针暂时无效
//写入头部信息
hsize = sizeof(int);
comp = compress_buf;
memcpy(comp, &size, sizeof(int));
//初始化滑动窗口和前向缓冲区(用0填充)
memset(window, 0, LZ77_WINDOW_SIZE);
memset(buffer, 0, LZ77_BUFFER_SIZE);
/*加载前向缓冲区*/
ipos = 0;//ipos指向源数据中正在处理的字节
//从源数据中取数据到缓冲区中
for (i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++)
{
buffer[i] = original[ipos];
ipos++;
}
/*压缩数据*/
opos = hsize * 8;//opos是压缩数据位的位置
remaining = size;
while (remaining > 0)
{
//标记包括 type、offset(在window中)、length、next四个参数
//next就是不匹配的字符
//tbits表示生成标记长度
if ((length = compare_win(window, buffer, &offset, &next)) != 0)
{
//能找到,type为1
token = 0x00000001 << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS);//编码短语标记
token = token | (offset << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS));//设置在滑动窗口找到匹配的偏移量
token = token | (length << (LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS));//设置匹配串的长度
token = token | next;//设置前向缓冲区中匹配串后面紧邻的字符
tbits = LZ77_PHRASE_BITS;//设置标记的位数
}
else
{
//没找到,标记就是原符号
token = 0x00000000;//编码一个字符标记
token = token | next;//设置为匹配的字符
tbits = LZ77_SYMBOL_BITS;//设置标记的位数
}
//s数据处理为大端模式
//token = htonl(token);
for (i = 0; i < tbits; i++)
{
//根据长度tbits取一位一位压缩
tpos = (sizeof(unsigned long) * 8) - tbits + i;
bit_set(comp, opos, bit_get((unsigned char*)& token, tpos));
opos++;
}
length++;//length是匹配数据字节长度
//从前向缓冲区拷贝数据到滑动窗口中
memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);
memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length);
//向前向缓冲区中读取更多数据
for (i = LZ77_BUFFER_SIZE - length; i < LZ77_BUFFER_SIZE && ipos < size; i++)
{
buffer[i] = original[ipos];
ipos++;
}
//调整剩余未匹配的长度
remaining = remaining - length;//等效于remaining - longest - 1
}
/*指向压缩数据缓冲区*/
*compressed = comp;
return ((opos - 1) / 8) + 1;//返回压缩数据中的字节数
}
int lz77_uncompress(const unsigned char* compressed, unsigned char** original)
{
u1 window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], uncompress_buf[2048];
u1* orig, * temp, next;
int length, offset, remaining, hsize, size, ipos, opos, tpos, state, i;
*original = orig = temp = NULL;//使得指向原始数据的指针暂时无效
//从压缩数据头信息中读取源数据字节数
hsize = sizeof(int);
memcpy(&size, compressed, sizeof(int));
/*初始化滑动窗口和前向缓冲区*/
memset(window, 0, LZ77_WINDOW_SIZE);
memset(buffer, 0, LZ77_BUFFER_SIZE);
/*解压缩数据*/
ipos = hsize * 8;
opos = 0;
remaining = size;
while (remaining > 0)
{
/*先读出type,看是否为window中字符(获取压缩数据中的下一位)*/
state = bit_get(compressed, ipos + hsize * 8 - 1);
if (state == 1)
{
//读出next
next = 0x00;
for (i = 0; i < LZ77_NEXT_BITS; i++)
{
tpos = i;
bit_set((unsigned char*)& next, tpos, bit_get(compressed, ipos));
ipos++;
}
//读length到length中
length = 0x00;
memset(&length, 0, sizeof(int));
for (i = 0; i < LZ77_BUFLEN_BITS; i++)
{
tpos = i;
bit_set((unsigned char*)& length, tpos, bit_get(compressed, ipos));
ipos++;
}
//读出offset到offset中(处理的是短语标记)
offset = 0x0000;
memset(&offset, 0, sizeof(int));
for (i = 0; i < LZ77_WINOFF_BITS; i++)
{
tpos = i;
bit_set((unsigned char*)& offset, tpos, bit_get(compressed, ipos));
ipos++;
}
ipos = ipos + 8;
/*确保偏移和长度对系统有正确的字节排序*/
//offset = ntohl (offset);
//length = ntohl (length);
i = 0;
/*将短语从滑动窗口写入原始数据缓冲区*/
orig = uncompress_buf;
//标记 = type + offset(在window中) + length + next
//解码 offset(在window中) + length对应字串
while (i < length && remaining>0)
{
orig[opos] = window[offset + i];
opos++;
/*在前向缓冲区中记录每个符号,直到准备更新滑动窗口*/
buffer[i] = window[offset + i];
i++;
remaining--;//调整剩余符号总数
}
/*存入next(将不匹配的符号写入原始数据缓冲区)*/
if (remaining > 0)
{
orig[opos] = next;
opos++;
buffer[i] = next;//仍需在前向缓冲区中记录此符号
remaining--;//调整剩余字符总数
}
length++;//调整短语长度
//是源字符
}
else
{
/*处理的是字符标记*/
next = 0x00;
//读出源字符给next
for (i = 0; i < LZ77_NEXT_BITS; i++)
{
tpos = (sizeof(unsigned char) * 8) - LZ77_NEXT_BITS + i;
bit_set((unsigned char*)& next, tpos, bit_get(compressed, ipos));
ipos++;
}
ipos = ipos + hsize * 8 - 8;
/*将字符写入原始数据缓冲区*/
orig = uncompress_buf;
orig[opos] = next;
opos++;
/*在前向缓冲区中记录当前字符*/
if (remaining > 0)
buffer[0] = next;
remaining--;//调整剩余数量
length = 1;//设置短语长度为1
}
//根据buffer更新window,这里是buffer为每次读到的数据,第一次更新一次(复制前向缓冲区的数据到滑动窗口)
memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);
}
*original = orig;//指向原始数据缓冲区
return opos;//返回解压缩的原始数据中的字节数
}