赫夫曼编码 压缩解压文件

简介 赫夫曼编码是利用wpl最小原则创建的二叉树然后对统计的数据进行压缩的一种编码方式,笔者这里提供了一种,java压缩解压文件的一种实现,当然这里的效率不是太高,只作为学习参考,如需使用呢,还需要改进



下面直接就给出了胆码实现

package huffman;


import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.InputStream;

import java.io.ObjectInputStream;

import java.io.ObjectOutputStream;

import java.io.OutputStream;

import java.util.ArrayList;

import java.util.Collections;

import java.util.HashMap;

import java.util.List;

import java.util.Map.Entry;


/*

 * 时间:2019 10 19

 * 作者:latefly

 * 功能:

 * 一个霍夫曼编码的示例演示包括霍夫曼树的创建以及

 * 霍夫曼编码的过程

 * 包含文件压缩解压的过程

 * 

 * 

 * 

 * 

 */


class HuffmanCode {

static HashMap<Byte, String> words = new HashMap<>();

static StringBuilder stringBuilder = new StringBuilder();


public static void main(String[] args) {

// TODO Auto-generated method stub

String srcFile1 = "src/testFolder/StringOperation.h";

String dstFile1 = "src/testFolder/StringOperation.package";

zipFile(srcFile1, dstFile1);

System.out.println("zip file finishend !!!");


String srcFile2 = "src/testFolder/StringOperation.package";

String dstFile2 = "src/testFolder/StringOperation_test.h";

unzipFile(srcFile2, dstFile2);

System.out.println("unzip file finishend !!!");


// String content = "i like like like java do you like a java";

// String content = "this is a huffman coding and decoding test1234";

// byte[] contents = content.getBytes();

// List<Nodes> array = getNode(contents);

// Nodes node = array.get(0);

// node.preOder(node);

// System.out.println(contents.length);

//

// getCode(node, "", stringBuilder);

// System.out.println(words.toString());

/*

* byte bytes[] = encoding(contents); System.out.println("data:" +

* content); System.out.println("encodeing result:" + bytes.length);

* for (int i = 0; i < bytes.length; i++) { System.out.print(bytes[i] +

* " "); } System.out.println(""); String decode = decoding(words,

* bytes);

*/

// System.out.println(decode);

}


public static List<Nodes> getNode(byte bytes[]) {

List<Nodes> array = new ArrayList<Nodes>();

// 统计字符里面的值

HashMap<Byte, Integer> map = new HashMap<Byte, Integer>();

for (byte value : bytes) {

Integer count = map.get(value);

if (count == null) {

map.put(value, 1);

} else {

map.put(value, count + 1);

}

}

// 取出结果构成链表

for (Entry<Byte, Integer> entry : map.entrySet()) {

array.add(new Nodes(entry.getValue(), entry.getKey()));

}

// 构建霍夫曼树

while (array.size() > 1) {

Collections.sort(array);

Nodes leftNode = array.get(0);

Nodes rightNode = array.get(1);


Nodes parent = new Nodes(leftNode.weight + rightNode.weight, null);

parent.left = leftNode;

parent.right = rightNode;


array.remove(leftNode);

array.remove(rightNode);


array.add(parent);

}

return array;

}


public static void getCode(Nodes node, String code, StringBuilder stringBuilder) {

StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);

stringBuilder2.append(code);

if (node.value == null) {

getCode(node.left, "0", stringBuilder2);

getCode(node.right, "1", stringBuilder2);

}

if (node.value != null)

words.put(node.value, stringBuilder2.toString());


}


// 生成霍夫曼编码的数组

public static byte[] encoding(byte[] content) {

// HashMap<Byte, String> words;


// 创建赫夫曼树

List<Nodes> array = getNode(content);

// 创建赫夫曼编码表根据赫夫曼树

Nodes node = array.get(0);

getCode(node, "", stringBuilder);


String str = "";

// 创建转换数据


for (int i = 0; i < content.length; i++) {


str += words.get(content[i]);

}


// formatString(str);


// 转换数据

int counter = 0;

String temp;

byte[] bytes;

if (str.length() % 8 == 0) {

bytes = new byte[str.length() / 8];

} else {

bytes = new byte[str.length() / 8 + 1];

}


for (int i = 0; i < str.length(); i += 8) {


if (i + 8 < str.length()) {

temp = str.substring(i, i + 8);

} else {

temp = str.substring(i);

}


bytes[counter++] = (byte) Integer.parseInt(temp, 2);


}

// System.out.println(code.length() + "\nCode:" + code);

// 数据转换然后输出


return bytes;

}


public static byte[] decoding(HashMap<Byte, String> huffmancode, byte bytes[]) {

String decode = "";

byte temp;


// 1将得到的bytes转换成字符串

// 2将字符串匹配码表得到原先的字符串

for (int i = 0; i < bytes.length; i++) {

temp = bytes[i];

// 最后一位的处理可能由于位数(原先转换时)不够转换的有问题

// 解决方法就是直接处理最后一个字节让它满足要求

// 对于最后一位的处理

if (i == bytes.length - 1) {

int thisTemp = bytes[i];

String str = Integer.toBinaryString(thisTemp);

decode += str;// 最后的一个位需要特殊处理以确保不会补位

break;

}

for (int j = 0; j < 8; j++) {

if ((temp >> 7 & 0x01) == 0x01) {

decode += "1";

} else {

decode += "0";

}

temp <<= 1;


}


}

// formatString(decode);


// 将原有的霍夫曼表键值与值反过来 然后进行查找

HashMap<String, Byte> referChart = new HashMap<>();

for (HashMap.Entry<Byte, String> item : huffmancode.entrySet()) {

referChart.put(item.getValue(), item.getKey());

}


// 查找解析过程

int counter = 1;

String substr = "";

boolean flag = true;

List<Byte> list = new ArrayList<>();

Byte key = null;

for (int i = 0; i < decode.length();) {

flag = true;

counter = 1;

while (flag) {


substr = decode.substring(i, i + counter);


key = referChart.get(substr);

if (key == null) {

counter++;

} else {

flag = false;

}

}

// 防止解析出错可能的死循环

if (i + counter > decode.length()) {

break;

}

list.add(key);

i += counter;

}

byte[] decodebuff = new byte[list.size()];

decode = "";


for (int i = 0; i < list.size(); i++) {

decodebuff[i] = list.get(i);

decode += (char) list.get(i).byteValue() + "";

}

// 此处编码有问题不能够还原原本的数据

// 最后的两个数据出错

// System.out.println(decode + "\nDecodeSize:" + decode.length());

return decodebuff;

}


public static void formatString(String str) {

for (int i = 0; i < str.length(); i += 8) {

for (int j = 0; j < 8; j++) {

if (i + j < str.length()) {

System.out.print(str.charAt(i + j));

} else {

break;

}

}


System.out.println();

}

}


/**

* @param srcFile

*            原文件路径全名

* @param dstFile

*            压缩文件路径全名

*/

public static void zipFile(String srcFile, String dstFile) {

// 输入流

InputStream is = null;

OutputStream os = null;

ObjectOutputStream obs = null;

try {

is = new FileInputStream(srcFile);

byte bytes[] = new byte[is.available()];// 创建与文件大小一样的数组然后储存数据

is.read(bytes);

// 压缩数据

byte[] encode = encoding(bytes);

os = new FileOutputStream(dstFile);

obs = new ObjectOutputStream(os);

// 首先储存字节码

obs.writeObject(encode);

// 然后呢储存霍夫曼编码

obs.writeObject(words);


} catch (Exception e) {

// TODO: handle exception

System.out.println(e.getMessage());

} finally {

try {

is.close();

obs.close();

os.close();

} catch (Exception e) {

// TODO: handle exception

System.out.println(e.getMessage());

}

}

}


/**

* 解压文件

* @param srcFile

*            源文件的全名

* @param dstFile

*            目的文件路径全名

*/

public static void unzipFile(String srcFile, String dstFile) {

InputStream in = null;

ObjectInputStream ois = null;

OutputStream os = null;


try {

in = new FileInputStream(srcFile);

// byte bytes[] = new byte[in.available()];

// in.read(bytes);

ois = new ObjectInputStream(in);

byte encode[] = (byte[]) ois.readObject();

HashMap<Byte, String> huffmancode = (HashMap<Byte, String>) ois.readObject();

byte decode[] = decoding(huffmancode, encode);

os = new FileOutputStream(dstFile);

os.write(decode);


} catch (Exception e) {

// TODO: handle exception

System.out.println(e.getMessage());

} finally {

try {

in.close();

ois.close();

os.close();

} catch (Exception e) {

System.out.println(e.getMessage());

}

}


}

}


class Nodes implements Comparable<Nodes> {

int weight;

Byte value;

Nodes left;

Nodes right;


public Nodes(int weight, Byte value) {

this.weight = weight;

this.value = value;

}


public void preOder(Nodes node) {

if (node != null) {

System.out.println("weight:" + node.weight + "  value:" + node.value);

preOder(node.left);

preOder(node.right);

}


}


public int compareTo(Nodes o) {

// TODO Auto-generated method stub

return this.weight - o.weight;

}


}


文章评论

Top