LZW编解码算法实现与分析实验报告

374 阅读 0 评论 247 点赞

我是靠谱客的博主爱笑小鸽子，这篇文章主要介绍LZW编解码算法实现与分析实验报告，现在分享给大家，希望可以做个参考。

一：编解码原理

1.词典树的结构

尾缀字符（suffix）
母节点（parent）
第一个孩子节点（firstchild）
下一个兄弟节点（nextsibling）

2.编码原理
编码流程图

LZW的编码思想是不断地从字符流中提取新的字符串，通俗地理解为新“词条”，然后用“代号”也就是码字表示这个“词条”。这样一来，对字符流的编码就变成了用码字去替换字符流，生成码字流，从而达到压缩数据的目的。LZW编码是围绕称为词典的转换表来完成的。LZW编码器通过管理这个词典完成输入与输出之间的转换。LZW编码器的输入是字符流，字符流可以是用8位ASCII字符组成的字符串，而输出是用n位(例如12位)表示的码字流。LZW编码算法的步骤如下：
步骤1：将词典初始化为包含所有可能的单字符，当前前缀P初始化为空。
步骤2：当前字符C=字符流中的下一个字符。
步骤3：判断P＋C是否在词典中
（1）如果“是”，则用C扩展P，即让P=P＋C，返回到步骤2。
（2）如果“否”，则
输出与当前前缀P相对应的码字W；
将P＋C添加到词典中；
令P=C，并返回到步骤2

3.解码原理
解码流程图

LZW解码算法开始时，译码词典和编码词典相同，包含所有可能的前缀根。具体解码算法如下：
步骤1：在开始译码时词典包含所有可能的前缀根。
步骤2：令CW：=码字流中的第一个码字。
步骤3：输出当前缀-符串string.CW到码字流。
步骤4：先前码字PW：=当前码字CW。
步骤5：当前码字CW：=码字流的下一个码字。
步骤6：判断当前缀-符串string.CW 是否在词典中。
（1）如果”是”，则把当前缀-符串string.CW输出到字符流。
当前前缀P：=先前缀-符串string.PW。
当前字符C：=当前前缀-符串string.CW的第一个字符。
把缀-符串P+C添加到词典。
（2）如果”否”，则当前前缀P：=先前缀-符串string.PW。
当前字符C：=当前缀-符串string.CW的第一个字符。
输出缀-符串P+C到字符流,然后把它添加到词典中。
步骤7：判断码字流中是否还有码字要译。
（1）如果”是”，就返回步骤4。
（2）如果”否”，结束。

二：代码部分

bitio.h

#pragma once
/*
 * Declaration for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */
#ifndef __BITIO__
#define __BITIO__

#include <stdio.h>

typedef struct {
	FILE *fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE *OpenBitFileInput(char *filename);
BITFILE *OpenBitFileOutput(char *filename);
void CloseBitFileInput(BITFILE *bf);
void CloseBitFileOutput(BITFILE *bf);
int BitInput(BITFILE *bf);
unsigned long BitsInput(BITFILE *bf, int count);
void BitOutput(BITFILE *bf, int bit);
void BitsOutput(BITFILE *bf, unsigned long code, int count);
#endif	// __BITIO__

bitio.cpp

/*
 * Definitions for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */

#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"

//打开输入文件
BITFILE *OpenBitFileInput(char *filename) {
	BITFILE *bf;
	bf = (BITFILE *)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdin;
	//else bf->fp = fopen(filename, "rb");
	else 
		fopen_s(&(bf->fp), filename, "rb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

//打开输出文件
BITFILE *OpenBitFileOutput(char *filename) {
	BITFILE *bf;
	bf = (BITFILE *)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdout;
	//else bf->fp = fopen(filename, "wb");
	else
		fopen_s(&(bf->fp), filename, "wb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

void CloseBitFileInput(BITFILE *bf) {
	fclose(bf->fp);
	free(bf);
}

void CloseBitFileOutput(BITFILE *bf) {
	// Output the remaining bits
	if (0x80 != bf->mask) fputc(bf->rack, bf->fp);
	fclose(bf->fp);
	free(bf);
}

int BitInput(BITFILE *bf) {
	int value;

	if (0x80 == bf->mask) {
		bf->rack = fgetc(bf->fp);
		if (EOF == bf->rack) {
			fprintf(stderr, "Read after the end of file reachedn");
			exit(-1);
		}
	}
	value = bf->mask & bf->rack;
	bf->mask >>= 1;
	if (0 == bf->mask) bf->mask = 0x80;
	return((0 == value) ? 0 : 1);
}

unsigned long BitsInput(BITFILE *bf, int count) {
	unsigned long mask;
	unsigned long value;
	mask = 1L << (count - 1);
	value = 0L;
	while (0 != mask) {
		if (1 == BitInput(bf))
			value |= mask;
		mask >>= 1;
	}
	return value;
}

void BitOutput(BITFILE *bf, int bit) {
	if (0 != bit) bf->rack |= bf->mask;
	bf->mask >>= 1;
	if (0 == bf->mask) {	// eight bits in rack
		fputc(bf->rack, bf->fp);
		bf->rack = 0;
		bf->mask = 0x80;
	}
}

void BitsOutput(BITFILE *bf, unsigned long code, int count) {
	unsigned long mask;

	mask = 1L << (count - 1);
	while (0 != mask) {
		BitOutput(bf, (int)(0 == (code&mask) ? 0 : 1));
		mask >>= 1;
	}
}
#if 0
int main(int argc, char **argv) {
	BITFILE *bfi, *bfo;
	int bit;
	int count = 0;

	if (1 < argc) {
		if (NULL == OpenBitFileInput(bfi, argv[1])) {
			fprintf(stderr, "fail open the filen");
			return -1;
		}
	}
	else {
		if (NULL == OpenBitFileInput(bfi, NULL)) {
			fprintf(stderr, "fail open stdinn");
			return -2;
		}
	}
	if (2 < argc) {
		if (NULL == OpenBitFileOutput(bfo, argv[2])) {
			fprintf(stderr, "fail open file for outputn");
			return -3;
		}
	}
	else {
		if (NULL == OpenBitFileOutput(bfo, NULL)) {
			fprintf(stderr, "fail open stdoutn");
			return -4;
		}
	}
	while (1) {
		bit = BitInput(bfi);
		fprintf(stderr, "%d", bit);
		count++;
		if (0 == (count & 7))fprintf(stderr, " ");
		BitOutput(bfo, bit);
	}
	return 0;
}
#endif

LZW_E.cpp

/*
 * Definition for LZW coding
 *
 * vim: ts=4 sw=4 cindent nowrap
 */
#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
#define MAX_CODE 65535

struct 
{
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString(int start, int code);
void InitDictionary(void);
void PrintDictionary(void) 
{
	int n;
	int count;
	for (n = 256; n < next_code; n++) {
		count = DecodeString(0, n);
		printf("%4d->", n);
		while (0 < count--) printf("%c", (char)(d_stack[count]));
		printf("n");
	}
}

int DecodeString(int start, int code) {
	int count;
	count = start;
	while (0 <= code) {
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;
	}
	return count;
}

//初始化词典
void InitDictionary(void) {
	int i;

	for (i = 0; i < 256; i++) {
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i + 1;
	}
	dictionary[255].nextsibling = -1;
	next_code = 256;     //定义新词的位置
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary(int character, int string_code) {
	int sibling;
	if (0 > string_code) return character;
	sibling = dictionary[string_code].firstchild;  //寻找第一个孩子节点
	while (-1 < sibling) {
		if (character == dictionary[sibling].suffix) return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

void AddToDictionary(int character, int string_code)     //将新的字符串加入到词典内
{
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;    //当前尾缀字符为character
	dictionary[next_code].parent = string_code;  //母节点为string_code
	dictionary[next_code].nextsibling = -1;      //无下一个兄弟节点
	dictionary[next_code].firstchild = -1;       //无第一个孩子节点
	firstsibling = dictionary[string_code].firstchild;  //查找母节点string_code的第一个孩子节点
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

void LZWEncode(FILE *fp, BITFILE *bf) {
	int character;
	int string_code;
	int index;
	unsigned long file_length;

	fseek(fp, 0, SEEK_END);
	file_length = ftell(fp);              //读取源文件的长度
	fseek(fp, 0, SEEK_SET);               //指回源文件开头
	BitsOutput(bf, file_length, 4 * 8);   //写文件长度
	InitDictionary();                     //初始化词典
	string_code = -1; 
	while (EOF != (character = fgetc(fp))) 
	{
		index = InDictionary(character, string_code);   //index=-1时，string+character不在词典内
		if (0 <= index) {	// string+character in dictionary
			string_code = index;
		}
		else {	// string+character not in dictionary
			output(bf, string_code);
			if (MAX_CODE > next_code) {	// free space in dictionary
				// add string+character to dictionary
				AddToDictionary(character, string_code);   //写入新词
			}
			string_code = character;    //string_code重新赋值为character，开始下一个词的编码
		}
	}
	output(bf, string_code);
}

void LZWDecode(BITFILE *bf, FILE *fp) {
	int character;
	int new_code, last_code;
	int phrase_length;
	unsigned long file_length;

	file_length = BitsInput(bf, 4 * 8);   //写文件长度
	if (-1 == file_length) file_length = 0;
	InitDictionary();       //初始化词典树
	last_code = -1;
	while (0 < file_length) 
	{
		new_code = input(bf);
		if (new_code >= next_code)  //不在词典内
		{ // this is the case CSCSC( not in dict)
			d_stack[0] = character;
			phrase_length = DecodeString(1, last_code);
		}
		else
		{
			phrase_length = DecodeString(0, new_code);
		}
		character = d_stack[phrase_length - 1];
		while (0 < phrase_length)
		{
			phrase_length--;
			fputc(d_stack[phrase_length], fp);
			file_length--;
		}
		if (MAX_CODE > next_code)
		{	// add the new phrase to dictionary
			AddToDictionary(character, last_code);
		}
		last_code = new_code;
	}
}




int main(int argc, char **argv) {
	FILE *fp;    //输入
	BITFILE *bf; //输出

	//argv[2]原始文件
	//argv[3]生成目标文件
	if (4 > argc) 
	{
		fprintf(stdout, "usage: n%s <o> <ifile> <ofile>n", argv[0]);
		fprintf(stdout, "t<o>: E or D reffers encode or decoden");
		fprintf(stdout, "t<ifile>: input file namen");
		fprintf(stdout, "t<ofile>: output file namen");
		return -1;
	}

	//argv[1][0]='E',编码
	if ('E' == argv[1][0])// do encoding
	{ 
		errno_t err = 0;
		err = fopen_s(&fp, argv[2], "rb");
		bf = OpenBitFileOutput(argv[3]);
		if (fp == NULL)
		{
			printf("pf is NULL");
			return 0;
		}
		if (bf == NULL)
		{
			printf("bf is NULL");
			return 0;
		}
	
		printf("encodingn");
		
		if (NULL != fp && NULL != bf)
		{
			LZWEncode(fp, bf);
			fclose(fp);
			CloseBitFileOutput(bf);
			fprintf(stdout, "encoding donen");
		}
		else
			printf("error");
	}

	//argv[1][0]='D',解码
	else if ('D' == argv[1][0]) 
	{	// do decoding
		bf = OpenBitFileInput(argv[2]);
		//fp = fopen(argv[3], "wb");
		errno_t err = 0;
		err = fopen_s(&fp, argv[3], "wb");
		if (fp == NULL)
		{
			printf("pf is NULL");
			return 0;
		}
		if (bf == NULL)
		{
			printf("bf is NULL");
			return 0;
		}
		printf("decodingn");
		if (NULL != fp && NULL != bf) {
			LZWDecode(bf, fp);
			fclose(fp);
			CloseBitFileInput(bf);
			fprintf(stdout, "decoding donen");
		}
	}
	else {	// otherwise
		fprintf(stderr, "not supported operationn");
	}
	return 0;
}

三：运行结果

对十种不同格式的文件分别进行编解码
原始文件：
在这里插入图片描述编码后生成文件

解码后生成文件
在这里插入图片描述

原始文件格式	原始文件大小	编码后文件大小	压缩比
doc	316KB	319KB	0.991
qcif	891KB	553KB	1.611
yuv	732KB	96KB	7.625
jpg	463KB	518KB	0.894
txt	100KB	60KB	1.667
tga	1201KB	1387KB	0.866
pdf	2785KB	3288KB	0.847
xls	140KB	89KB	1.573
pptx	208KB	267KB	0.779
png	129KB	174KB	0.741