(以内容“Lempel–Ziv–Storer–Szymanski 一种压缩算法 每八个东西前有个字节标记这8个是啥。对应位是1的话,就是原来的字节。对应位是0...”创建新页面) |
(→代码) |
||
(未显示同一用户的3个中间版本) | |||
第18行: | 第18行: | ||
0->读俩字节,取第二字节高四位拼上第一个字节作为窗口内开始位置,第二字节低四位+3(0,1,2不可能……)作为长度,输出窗口内容,同时也附加到窗口之后。 | 0->读俩字节,取第二字节高四位拼上第一个字节作为窗口内开始位置,第二字节低四位+3(0,1,2不可能……)作为长度,输出窗口内容,同时也附加到窗口之后。 | ||
+ | |||
+ | 窗口大小0x1000 | ||
== 变种 == | == 变种 == | ||
− | 取第一字节拼上第二字节高'''三'''位作为窗口内开始位置,第二字节低'''五'''位+'''2''' | + | * 取第一字节拼上第二字节高'''三'''位作为窗口内开始位置,第二字节低'''五'''位+'''2'''作为长度,窗口大小0x800 |
+ | |||
+ | == revision == | ||
+ | 第一版用的read和write,系统调用占了大部分时间…… | ||
+ | 现在用fread和fwrite了 | ||
== 代码 == | == 代码 == | ||
第32行: | 第38行: | ||
using namespace std; | using namespace std; | ||
+ | |||
+ | int get_char(FILE *f, unsigned char *ch) | ||
+ | { | ||
+ | int cnt = fread(ch, 1, 1, f); | ||
+ | return cnt; | ||
+ | } | ||
+ | |||
+ | int put_char(FILE *f, unsigned char *ch) | ||
+ | { | ||
+ | int cnt = fwrite(ch, 1, 1, f); | ||
+ | return cnt; | ||
+ | } | ||
string dump(char ch) | string dump(char ch) | ||
第52行: | 第70行: | ||
} | } | ||
− | int uncompress( | + | int uncompress(FILE* fin, FILE* fout, int from_bits) |
{ | { | ||
// example: | // example: | ||
第63行: | 第81行: | ||
unsigned char from_mask = 0xFF - len_mask; | unsigned char from_mask = 0xFF - len_mask; | ||
− | + | // cout << hex << window_size << dec << '/' << dump(len_mask) << '/' << dump(from_mask) << endl; | |
− | char window[window_size]; | + | unsigned char window[window_size]; |
int win_pos = from_bits == 4 ? 0xFEE : 0; | int win_pos = from_bits == 4 ? 0xFEE : 0; | ||
int out_len = 0; | int out_len = 0; | ||
第71行: | 第89行: | ||
int br_cnt = 0; | int br_cnt = 0; | ||
memset(window, 0, window_size); | memset(window, 0, window_size); | ||
+ | unsigned char xbuf[8]; | ||
while (true) | while (true) | ||
{ | { | ||
− | char flag; | + | unsigned char flag; |
− | int cnt = | + | int cnt = get_char(fin, &flag); |
if (cnt == 0) break; | if (cnt == 0) break; | ||
int index = 0; | int index = 0; | ||
+ | |||
for (int index = 0; index < 8; index++, flag >>= 1) | for (int index = 0; index < 8; index++, flag >>= 1) | ||
{ | { | ||
第83行: | 第103行: | ||
{ | { | ||
out_len++; | out_len++; | ||
− | char ch; | + | unsigned char ch; |
− | cnt = | + | cnt = get_char(fin, &ch); |
if (cnt == 0) break; | if (cnt == 0) break; | ||
− | + | put_char(fout, &ch); | |
window[win_pos] = ch; | window[win_pos] = ch; | ||
win_pos = (win_pos + 1) % window_size; | win_pos = (win_pos + 1) % window_size; | ||
第96行: | 第116行: | ||
br_cnt++; | br_cnt++; | ||
unsigned char lead, offset; | unsigned char lead, offset; | ||
− | + | cnt = get_char(fin, &offset); | |
if (cnt == 0) break; | if (cnt == 0) break; | ||
− | cnt = | + | cnt = get_char(fin, &lead); |
if (cnt == 0) break; | if (cnt == 0) break; | ||
// cout << endl << "backref: pos=" << hex << in_len << ", dpos=" << out_len << ", lead=" << hex << (int)lead << ", offset=" << (int)offset << dec << '\t' << dumpb(lead) << "/" << dumpb(offset) << endl; | // cout << endl << "backref: pos=" << hex << in_len << ", dpos=" << out_len << ", lead=" << hex << (int)lead << ", offset=" << (int)offset << dec << '\t' << dumpb(lead) << "/" << dumpb(offset) << endl; | ||
第106行: | 第126行: | ||
for (int i=0; i<copy_len; i++) | for (int i=0; i<copy_len; i++) | ||
{ | { | ||
− | + | int ret = put_char(fout, &window[copy_from]); | |
if (ret != 1) | if (ret != 1) | ||
cerr << "writeout failed! " << errno << endl; | cerr << "writeout failed! " << errno << endl; | ||
第132行: | 第152行: | ||
} | } | ||
− | + | FILE * fin = fopen(argv[1], "rb"); | |
− | + | FILE * fout = fopen(argv[2], "wb"); | |
− | if | + | if (fin == NULL || fout == NULL) |
{ | { | ||
cerr << "file open error! " << errno << endl; | cerr << "file open error! " << errno << endl; | ||
第146行: | 第166行: | ||
while (skip > 0) | while (skip > 0) | ||
{ | { | ||
− | int cnt = | + | int cnt = fread(buf, 1, skip > 1024 ? 1024 : skip, fin); |
if (cnt <= 0) | if (cnt <= 0) | ||
{ | { | ||
第162行: | 第182行: | ||
int out_len = uncompress(fin, fout, from_bits); | int out_len = uncompress(fin, fout, from_bits); | ||
cout << "result len: " << out_len << endl; | cout << "result len: " << out_len << endl; | ||
− | + | fclose(fin); | |
− | + | fclose(fout); | |
} | } | ||
</source> | </source> |
2012年1月10日 (二) 02:25的最新版本
Lempel–Ziv–Storer–Szymanski
一种压缩算法
每八个东西前有个字节标记这8个是啥。对应位是1的话,就是原来的字节。对应位是0,则这个东西占俩字节,引用窗口里的一段东西。
标准LZSS算法
初始窗口指针=0xFEE // 恶心!
初始窗口清空。
读标记
对每一位:
1->原样输出一个字节,附加到窗口之后。
0->读俩字节,取第二字节高四位拼上第一个字节作为窗口内开始位置,第二字节低四位+3(0,1,2不可能……)作为长度,输出窗口内容,同时也附加到窗口之后。
窗口大小0x1000
变种
- 取第一字节拼上第二字节高三位作为窗口内开始位置,第二字节低五位+2作为长度,窗口大小0x800
revision
第一版用的read和write,系统调用占了大部分时间…… 现在用fread和fwrite了
代码
#include <fcntl.h>
#include <iostream>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
using namespace std;
int get_char(FILE *f, unsigned char *ch)
{
int cnt = fread(ch, 1, 1, f);
return cnt;
}
int put_char(FILE *f, unsigned char *ch)
{
int cnt = fwrite(ch, 1, 1, f);
return cnt;
}
string dump(char ch)
{
char buf[3];
snprintf(buf, 3, "%02x", (unsigned char)ch);
return string(buf);
}
string dumpb(char ch)
{
char buf[10];
buf[8] = 0;
for (int i=0; i<8; i++)
if (ch & (1 << i))
buf[7-i] = '1';
else
buf[7-i] = '0';
return string(buf);
}
int uncompress(FILE* fin, FILE* fout, int from_bits)
{
// example:
// from_bits = 3
// len_mask = 0x1f : 0001 1111
// from_mask = 0xe0 : 1110 0000
// ^^^ 3
int window_size = 0x100 << from_bits;
unsigned char len_mask = (1 << (8 - from_bits)) - 1;
unsigned char from_mask = 0xFF - len_mask;
// cout << hex << window_size << dec << '/' << dump(len_mask) << '/' << dump(from_mask) << endl;
unsigned char window[window_size];
int win_pos = from_bits == 4 ? 0xFEE : 0;
int out_len = 0;
int in_len = 0;
int br_cnt = 0;
memset(window, 0, window_size);
unsigned char xbuf[8];
while (true)
{
unsigned char flag;
int cnt = get_char(fin, &flag);
if (cnt == 0) break;
int index = 0;
for (int index = 0; index < 8; index++, flag >>= 1)
{
if (flag & 1)
{
out_len++;
unsigned char ch;
cnt = get_char(fin, &ch);
if (cnt == 0) break;
put_char(fout, &ch);
window[win_pos] = ch;
win_pos = (win_pos + 1) % window_size;
// cout << dump(ch) << " ";
// if (isprint(ch))
// cout << ch << ' ';
in_len++;
} else {
br_cnt++;
unsigned char lead, offset;
cnt = get_char(fin, &offset);
if (cnt == 0) break;
cnt = get_char(fin, &lead);
if (cnt == 0) break;
// cout << endl << "backref: pos=" << hex << in_len << ", dpos=" << out_len << ", lead=" << hex << (int)lead << ", offset=" << (int)offset << dec << '\t' << dumpb(lead) << "/" << dumpb(offset) << endl;
int copy_len = (lead & len_mask) + 3;
int copy_from = ((lead & from_mask) << from_bits) | (offset & 0xff);
// cout << "backref: len=" << hex << copy_len << ", from=" << copy_from << dec << endl;
for (int i=0; i<copy_len; i++)
{
int ret = put_char(fout, &window[copy_from]);
if (ret != 1)
cerr << "writeout failed! " << errno << endl;
window[win_pos] = window[copy_from];
copy_from = (copy_from + 1) % window_size;
win_pos = (win_pos + 1) % window_size;
}
out_len += copy_len;
in_len += 2;
}
}
if (cnt == 0) break;
}
return out_len;
}
int main(int argc, char **argv)
{
if (argc < 3)
{
cerr << "usage: " << argv[0] << " <infile> <outfile> [<skip>]" << endl;
return 1;
}
FILE * fin = fopen(argv[1], "rb");
FILE * fout = fopen(argv[2], "wb");
if (fin == NULL || fout == NULL)
{
cerr << "file open error! " << errno << endl;
return errno;
}
if (argc > 3)
{
int skip = atoi(argv[3]);
char buf[1024];
while (skip > 0)
{
int cnt = fread(buf, 1, skip > 1024 ? 1024 : skip, fin);
if (cnt <= 0)
{
cerr << "skip error! " << errno << endl;
return 3;
}
skip -= cnt;
}
}
int from_bits = 4;
if (argc > 4)
from_bits = atoi(argv[4]);
int out_len = uncompress(fin, fout, from_bits);
cout << "result len: " << out_len << endl;
fclose(fin);
fclose(fout);
}