霍夫曼树又称最优二叉树,是一种带权路径长度最短的二叉树。所谓树的带权路径长度,就是树中所有的叶结点的权值乘上其到根结点的路径长度(若根结点为0层,叶结点到根结点的路径长度为叶结点的层数)。树的路径长度是从树根到每一结点的路径长度之和。
赫夫曼树节点
/** * 创建Node,包含数据和权值 */ public class Node implements Comparable<Node> { //存放字符串本身 Byte data; //存放权值 字符出现的次数 int weight; Node left; Node right; public Node(int weight) { this.weight = weight; } public Node(Byte data, int weight) { this.data = data; this.weight = weight; } /** * 前序遍历 */ public void preOrder() { System.out.println(this); if (this.left != null) { this.left.preOrder(); } if (this.right != null) { this.right.preOrder(); } } @Override public int compareTo(Node o) { return this.weight - o.weight; } @Override public String toString() { return "Node{" + "date=" + data + ", weight=" + weight + '}'; } }
压缩、解压、测试代码
import java.io.*; import java.util.*; public class HuffmanCode { // 赫夫曼编码 32=01, 97=100, 100=11000, 117=11001, 101=1110, 118=11011, 105=101, 121=11010, 106=0010, 107=1111, 108=000, 111=0011 static Map<Byte, String> huffmanCodes = new HashMap<>(); public static void main(String[] args) { String content = "i like like like java do you like a java"; byte[] strByte = content.getBytes(); System.out.println("压缩前数组" + Arrays.toString(strByte)); System.out.println("压缩前数组长度" + strByte.length); //压缩 byte[] huffmanCodeBytes = huffmanZip(strByte); System.out.println("压缩后数组" + Arrays.toString(huffmanCodeBytes)); System.out.println("压缩后数组长度" + huffmanCodeBytes.length); //解压 byte[] sourceByte = decode(huffmanCodes, huffmanCodeBytes); System.out.println("解压后数组" + Arrays.toString(sourceByte)); System.out.println("解压后数组长度" + sourceByte.length); //解压后数据 System.out.println(new String(sourceByte)); //String srcFile = "D:\\demo.png"; //String zipFile = "D:\\demo.zip"; //zipFile(srcFile, zipFile); //System.out.println("压缩文件成功"); String zipFile = "D:\\demo.zip"; String dstFile = "D:\\demo2.png"; unZipFile(zipFile,dstFile); } /** * 赫夫曼解压 * * @param zipFile 准备解压的文件 * @param dstFile 解压后的文件存放目录 */ public static void unZipFile(String zipFile, String dstFile) { //定义文件输入流 InputStream is = null; //定义一个对象输入流 ObjectInputStream ois = null; //定义文件输出流 OutputStream os = null; try { //创建文件输入流 is = new FileInputStream(zipFile); //创建一个和is关联的对象输入流 ois = new ObjectInputStream(is); //读取byte数组,huffmanBytes byte[] huffmanBytes = (byte[]) ois.readObject(); //读取赫夫曼编码表 Map<Byte, String> huffmanCode = (Map<Byte, String>) ois.readObject(); // 解码 byte[] bytes = decode(huffmanCode, huffmanBytes); // 将解码后字节流写入目标文件 os = new FileOutputStream(dstFile); os.write(bytes); } catch (Exception e) { throw new RuntimeException(e); } finally { try { os.close(); ois.close(); is.close(); } catch (IOException e) { throw new RuntimeException(e); } } } /** * 编写方法,将文件进行压缩 * * @param srcFile 压缩文件全路径 * @param dstFile 压缩后文件输出路径 */ public static void zipFile(String srcFile, String dstFile) { //创建输入流 FileInputStream inputStream = null; //创建输出流 FileOutputStream outputStream = null; ObjectOutputStream oos = null; try { //创建文件输出流 inputStream = new FileInputStream(srcFile); // 创建一个和源文件大小一样的byte[] int inputSize = inputStream.available(); byte[] bytes = new byte[inputSize]; //读取文件 inputStream.read(bytes); //压缩文件 byte[] zipBytes = huffmanZip(bytes); //创建文件输出流,存放压缩文件 outputStream = new FileOutputStream(dstFile); //创建一个和文件输出流相关的ObjectOutputStream oos = new ObjectOutputStream(outputStream); // 将赫夫曼编码后的字节数组写入压缩文件 oos.writeObject(zipBytes); // 以对象流写入赫夫曼编码, oos.writeObject(huffmanCodes); } catch (IOException e) { throw new RuntimeException(e); } finally { try { inputStream.close(); outputStream.close(); oos.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } } /** * 对压缩数据的解码 * * @param huffmanCodes * @param huffmanByte * @return */ private static byte[] decode(Map<Byte, String> huffmanCodes, byte[] huffmanByte) { //将赫夫曼编码数组转换为二进制字符串 StringBuffer stringBuffer = new StringBuffer(); for (int i = 0; i < huffmanByte.length; i++) { //最后一位不需要补高位 if (i != huffmanByte.length - 1) { stringBuffer.append(byteToBitString(true, huffmanByte[i])); } else { stringBuffer.append(byteToBitString(false, huffmanByte[i])); } } //根据赫夫曼编码表,反写解码表 Map<String, Byte> decodeMap = new HashMap<>(); for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) { decodeMap.put(entry.getValue(), entry.getKey()); } //创建要给的集合,存放byte List<Byte> list = new ArrayList<>(); StringBuffer key = new StringBuffer(); for (int i = 0; i < stringBuffer.length(); i++) { key.append(stringBuffer.substring(i, i + 1)); Byte aByte = decodeMap.get(key.toString()); if (aByte != null) { list.add(aByte); key = new StringBuffer(); } } //构造返回数据 byte[] res = new byte[list.size()]; for (int i = 0; i < list.size(); i++) { res[i] = list.get(i); } return res; } /** * 将一个byte转成一个二进制的字符串 * * @param b 传入的byte * @param flag 标志是否需要补高位,如果为true,表示需要补高位 * @return byte对应的二进制字符串(按照补码返回) */ private static String byteToBitString(boolean flag, byte b) { // 使用变量保存b 将b转换为int int temp = b; //如果是正数,需要补高位 1 0000 0000 | 0000 0001= 10000 0001 if (flag) { temp |= 256; } //返回对应二进制的补码 String str = Integer.toBinaryString(temp); if (flag) { return str.substring(str.length() - 8); } else { return str; } } /** * 将字节数组压缩为赫夫曼数组 * * @param bytes * @return */ private static byte[] huffmanZip(byte[] bytes) { //获取赫夫曼树节点 List<Node> nodes = getNodes(bytes); //创建霍夫曼树 Node huffmanTree = createHuffmanTree(nodes); //生成霍夫曼编码 getCodes(huffmanTree); //压缩数据 return zip(bytes, huffmanCodes); } /** * 将字符串对应的byte[]数组,通过生成的赫夫曼编码表,返回一个赫夫曼编码压缩后的byte[] * * @param bytes 这是原始的字符串对应的byte[] * @param huffmanCodes 生成的赫夫曼编码map * @return 返回赫夫曼处理后的byte * 10101000补码 反码10101000-1=10100111 原码11011000 -(24+64) */ private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) { StringBuffer temp = new StringBuffer(); for (byte b : bytes) { temp.append(huffmanCodes.get(b)); } //计算新数组长度 int len=(tem.length()+7)/8 int len; if (temp.length() % 8 == 0) { len = temp.length() / 8; } else { len = temp.length() / 8 + 1; } //创建存储压缩后的byte数组; byte[] huffmanCodeBytes = new byte[len]; //记录第几个byte int index = 0; for (int i = 0; i < temp.length(); i += 8) { String strByte; if (i + 8 > temp.length()) { strByte = temp.substring(i); } else { strByte = temp.substring(i, i + 8); } //将字符串二进制转换为byte huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte, 2); index++; } return huffmanCodeBytes; } /** * 重载 * * @param root */ private static void getCodes(Node root) { if (root != null) { getCodes(root.left, "0", new StringBuffer()); getCodes(root.right, "1", new StringBuffer()); } } /** * 将传入的node节点的所有叶子节点的赫夫曼编码得到,放到huffmanCodes集合 * * @param node 传入节点 * @param code 路径:左子节点0,右子节点1 * @param stringBuffer 用于路径拼接 */ private static void getCodes(Node node, String code, StringBuffer stringBuffer) { StringBuffer temp = new StringBuffer(stringBuffer); temp.append(code); if (code != null) { //飞叶子结点 if (node.data == null) { //左递归 getCodes(node.left, "0", temp); //右递归 getCodes(node.right, "1", temp); } else { //处理叶子节点 huffmanCodes.put(node.data, temp.toString()); } } } /** * 前序打印 * * @param root */ private static void preOrder(Node root) { if (root != null) { root.preOrder(); } else { System.out.println("赫夫曼树为空"); } } /** * 接受数组返回节点集合 * * @param bytes * @return */ private static List<Node> getNodes(byte[] bytes) { // 创建一个ArrayList ArrayList<Node> nodes = new ArrayList<>(); //创建一个map保存byte出现的次数 HashMap<Byte, Integer> counts = new HashMap<>(); for (byte b : bytes) { Integer num = counts.get(b); if (num == null) { counts.put(b, 1); } else { counts.put(b, ++num); } } //构建集合 for (Map.Entry<Byte, Integer> entry : counts.entrySet()) { nodes.add(new Node(entry.getKey(), entry.getValue())); } return nodes; } /** * 创建赫夫曼树 * * @param nodes * @return */ private static Node createHuffmanTree(List<Node> nodes) { while (nodes.size() > 1) { //排序从小到大 Collections.sort(nodes); // 取出前两个 Node leftNode = nodes.get(0); Node rightNode = nodes.get(1); // 创建父节点 Node parent = new Node(null, leftNode.weight + rightNode.weight); parent.left = leftNode; parent.right = rightNode; //删除已使用节点,增加新创建的节点 nodes.remove(leftNode); nodes.remove(rightNode); nodes.add(parent); } return nodes.get(0); } }
压缩前数据长度为40,压缩后数据长度为17.
压缩前数组[105, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97, 32, 100, 111, 32, 121, 111, 117, 32, 108, 105, 107, 101, 32, 97, 32, 106, 97, 118, 97] 压缩前数组长度40 压缩后数组[-88, -65, -56, -65, -56, -65, -55, 77, -57, 6, -24, -14, -117, -4, -60, -90, 28] 压缩后数组长度17 解压后数组[105, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97, 32, 100, 111, 32, 121, 111, 117, 32, 108, 105, 107, 101, 32, 97, 32, 106, 97, 118, 97] 解压后数组长度40 i like like like java do you like a java