Lempel–Ziv–Storer–Szymanski
一种压缩算法
每八个东西前有个字节标记这8个是啥。对应位是1的话,就是原来的字节。对应位是0,则这个东西占俩字节,引用窗口里的一段东西。
标准LZSS算法
初始窗口指针=0xFEE // 恶心!
初始窗口清空。
读标记
对每一位:
1->原样输出一个字节,附加到窗口之后。
0->读俩字节,取第二字节高四位拼上第一个字节作为窗口内开始位置,第二字节低四位+3(0,1,2不可能……)作为长度,输出窗口内容,同时也附加到窗口之后。
变种
取第一字节拼上第二字节高三位作为窗口内开始位置,第二字节低五位+2作为长度
代码
#include <fcntl.h>
#include <iostream>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
using namespace std;
string dump(char ch)
{
char buf[3];
snprintf(buf, 3, "%02x", (unsigned char)ch);
return string(buf);
}
string dumpb(char ch)
{
char buf[10];
buf[8] = 0;
for (int i=0; i<8; i++)
if (ch & (1 << i))
buf[7-i] = '1';
else
buf[7-i] = '0';
return string(buf);
}
int uncompress(int fin, int fout, int from_bits)
{
// example:
// from_bits = 3
// len_mask = 0x1f : 0001 1111
// from_mask = 0xe0 : 1110 0000
// ^^^ 3
int window_size = 0x100 << from_bits;
unsigned char len_mask = (1 << (8 - from_bits)) - 1;
unsigned char from_mask = 0xFF - len_mask;
cout << hex << window_size << dec << '/' << dump(len_mask) << '/' << dump(from_mask) << endl;
char window[window_size];
int win_pos = from_bits == 4 ? 0xFEE : 0;
int out_len = 0;
int in_len = 0;
int br_cnt = 0;
memset(window, 0, window_size);
while (true)
{
char flag;
int cnt = read(fin, &flag, 1);
if (cnt == 0) break;
int index = 0;
for (int index = 0; index < 8; index++, flag >>= 1)
{
if (flag & 1)
{
out_len++;
char ch;
cnt = read(fin, &ch, 1);
if (cnt == 0) break;
write(fout, &ch, 1);
window[win_pos] = ch;
win_pos = (win_pos + 1) % window_size;
// cout << dump(ch) << " ";
// if (isprint(ch))
// cout << ch << ' ';
in_len++;
} else {
br_cnt++;
unsigned char lead, offset;
cnt = read(fin, &offset, 1);
if (cnt == 0) break;
cnt = read(fin, &lead, 1);
if (cnt == 0) break;
// cout << endl << "backref: pos=" << hex << in_len << ", dpos=" << out_len << ", lead=" << hex << (int)lead << ", offset=" << (int)offset << dec << '\t' << dumpb(lead) << "/" << dumpb(offset) << endl;
int copy_len = (lead & len_mask) + 3;
int copy_from = ((lead & from_mask) << from_bits) | (offset & 0xff);
// cout << "backref: len=" << hex << copy_len << ", from=" << copy_from << dec << endl;
for (int i=0; i<copy_len; i++)
{
int ret = write(fout, &window[copy_from], 1);
if (ret != 1)
cerr << "writeout failed! " << errno << endl;
window[win_pos] = window[copy_from];
copy_from = (copy_from + 1) % window_size;
win_pos = (win_pos + 1) % window_size;
}
out_len += copy_len;
in_len += 2;
}
}
if (cnt == 0) break;
}
return out_len;
}
int main(int argc, char **argv)
{
if (argc < 3)
{
cerr << "usage: " << argv[0] << " <infile> <outfile> [<skip>]" << endl;
return 1;
}
int fin = open(argv[1], O_RDONLY);
int fout = open(argv[2], O_WRONLY | O_CREAT, 0644);
if ((fin < 0) || (fout < 0))
{
cerr << "file open error! " << errno << endl;
return errno;
}
if (argc > 3)
{
int skip = atoi(argv[3]);
char buf[1024];
while (skip > 0)
{
int cnt = read(fin, buf, skip > 1024 ? 1024 : skip);
if (cnt <= 0)
{
cerr << "skip error! " << errno << endl;
return 3;
}
skip -= cnt;
}
}
int from_bits = 4;
if (argc > 4)
from_bits = atoi(argv[4]);
int out_len = uncompress(fin, fout, from_bits);
cout << "result len: " << out_len << endl;
close(fin);
close(fout);
}