昨天只完成了赫夫曼编码表的生成实现,今天补了一些没有学过的知识,举出了一个压缩的实例,并将赫夫曼编码的方法封装。
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
// TODO Auto-generated method stub
String str = "i love love a a man man and and monocle monocle";
System.out.println("原来的字符串为" + str);
byte[] strBytes = str.getBytes();
System.out.println("原来字符串长度为" + strBytes.length);
byte[] huffmanCodesBytes = huffmanZip(strBytes);
System.out.println("压缩后的结果为" + Arrays.toString(huffmanCodesBytes));
System.out.println("压缩后的长度为" + huffmanCodesBytes.length);
/*
* List<Node> nodes = getNode(strBytes); System.out.println("nodes=" + nodes);
*
* // 测试一把创建的二叉树 System.out.println("赫夫曼树"); Node huffmanTreeRoot =
* createHuffmanTree(nodes); System.out.println("前序遍历");
* huffmanTreeRoot.preOrder();
*
* // 测试是否生成了对应的赫夫曼编码 Map<Byte, String> huffmanCodes =
* getCodes(huffmanTreeRoot); System.out.println("生成的赫夫曼编码表" + huffmanCodes);
*
* //测试最后结果 byte[] huffmanCodeBytes=zip(strBytes,huffmanCodes);
* System.out.println("huffmanCodeBytes="+Arrays.toString(huffmanCodeBytes));
*/
}
// 使用一个方法,将前面的方法封装起来
private static byte[] huffmanZip(byte[] bytes) {
List<Node> nodes = getNode(bytes);
// 根据nodes创建赫夫曼树
Node huffmanTreeRoot = createHuffmanTree(nodes);
// 对应的赫夫曼编码(根据赫夫曼树)
Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
// 根据生成的赫夫曼编码压缩得到压缩后的赫夫曼编码字节数组
byte[] huffmanCodeBytes = zip(bytes, huffmanCodes);
return huffmanCodeBytes;
}
// 编写一个方法,将字符串对应的byte【】数组通过生成的赫夫曼编码表,返回一个赫夫曼编码压缩后的byte【】
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
// 1、利用huffmanCodes将bytes转成赫夫曼编码对应的字符串
StringBuilder stringBuilder = new StringBuilder();
// 遍历bytes
for (byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
// 将字符串转为byte【】,注意源码,反码,补码的问题
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
// 创建存储压缩后的byte数组
byte[] huffmanCodeBytes = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {// 不够八位
strByte = stringBuilder.substring(i);
} else {
strByte = stringBuilder.substring(i, i + 8);
}
// 将strByte转成一个byte,放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
return huffmanCodeBytes;
}
// 生成赫夫曼编码表
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
// 将传入的node结点的所有叶子结点的赫夫曼编码得到,并放入到huffmanCodes集合
// StringBuilder用于拼接路径
// 路径:左子结点是0,右是1
public static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
stringBuilder2.append(code);
if (node != null) {// node==null不处理
// 判断当前node是不是叶子结点
if (node.data == null) {
getCodes(node.left, "0", stringBuilder2);
getCodes(node.right, "1", stringBuilder2);
} else {
// 找到叶子结点
huffmanCodes.put(node.data, stringBuilder2.toString());
}
}
}
// 为了调用方便,重载getCodes
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
// 前序遍历的方法
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("赫夫曼树为空");
}
}
// bytes接收字节数组
// 返回的就是List形式
private static List<Node> getNode(byte[] bytes) {
// 创建一个ArrayList
ArrayList<Node> nodes = new ArrayList<Node>();
// 遍历bytes,统计每一个byte出现的次数
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {// Map还没有这个字符数据,第一次
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
// 把每一个键值对转成Node对象并加入到nodes集合
// 遍历map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
private static Node createHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
// 排序,从小到大
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
// 创建一棵新的二叉树,它的根节点没有data,只有权值
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
// 将已经处理的两棵二叉树从nodes删除
nodes.remove(leftNode);
nodes.remove(rightNode);
// 将新的二叉树加入
nodes.add(parent);
}
return nodes.get(0);
}
}
//创建Node,存放数据和权值
class Node implements Comparable<Node> {
Byte data;// 存放数据(字符),比如'a'→97
int weight;// 权值,表示字符出现的次数
Node left;
Node right;
public Node(Byte data, int weight) {
super();
this.data = data;
this.weight = weight;
}
public int compareTo(Node o) {
// 从小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node [data=" + data + ", weight=" + weight + "]";
}
public Byte getData() {
return data;
}
public void setData(Byte data) {
this.data = data;
}
public int getWeight() {
return weight;
}
public void setWeight(int weight) {
this.weight = weight;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
// 前序遍历
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
}
运行结果如下: