public class HuffmanCode { public static void main(String[] args) { String content = "i like like like java do you like a java"; byte[] contentBytes = content.getBytes(); System.out.println(contentBytes.length);//40 // List<Node> nodes = getNodes(contentBytes); // System.out.println(nodes); // // Node huffmanTreeRoot = createHuffmanTree(nodes); // preOrder(huffmanTreeRoot); // // Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot); // System.out.println("生成的哈夫曼编码表:" + huffmanCodes); // // byte[] huffmanCodeBytes = zip(contentBytes, huffmanCodes); // System.out.println("huffmanCodeBytes=" + huffmanCodeBytes); byte[] huffmanCodeBytes = huffmanZip(contentBytes); System.out.println("压缩后数组=" + Arrays.toString(huffmanCodeBytes)); System.out.println("长度=" + huffmanCodeBytes.length); byte b = -1; System.out.println(byteToBit(true, b)); byte[] decodeBytes = decode(huffmanCodes, huffmanCodeBytes); System.out.println("decodeBytes=" + new String(decodeBytes)); //测试压缩文件 String srcFile = "f:\src.png"; String dstFile = "f:\dst.png"; zipFile(srcFile, dstFile); System.out.println("压缩文件成功"); //测试解压文件 String zipFile = "f:\dst.png"; String dstFile2 = "f:\src2.png"; unzipFile(zipFile,dstFile2); System.out.println("解压文件成功"); } /** * 解压文件 * * @param zipFile 准备解压的文件 * @param dstFile 文件解压路径 */ public static void unzipFile(String zipFile, String dstFile) { //定义文件的输入流 InputStream is = null; //定义对象输入流 ObjectInputStream ois = null; //定义文件的输出流 OutputStream os = null; try { is = new FileInputStream(zipFile); ois = new ObjectInputStream(is); byte[] huffmanBytes = (byte[]) ois.readObject(); //读取赫夫曼编码表 Map<Byte, String> huffmanCodes = (Map<Byte, String>) ois.readObject(); //解码 byte[] bytes = decode(huffmanCodes, huffmanBytes); //将bytes数组写入目标文件 os = new FileOutputStream(dstFile); //写数据到文件中 os.write(bytes); } catch (Exception e) { System.out.println(e.getMessage()); } finally { try { os.close(); ois.close(); is.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } } /** * 文件压缩 * * @param srcFile 原文件路径 * @param dstFile 压缩文件目录 */ public static void zipFile(String srcFile, String dstFile) { //创建输出流 OutputStream os = null; ObjectOutputStream oos = null; //创建文件输入流 FileInputStream is = null; try { is = new FileInputStream(srcFile); //创建和原文件一样大小的byte数组 byte[] b = new byte[is.available()]; //读取文件 is.read(b); //获取文件对应的赫夫曼编码表 byte[] huffmanBytes = huffmanZip(b); //创建输出流,存放压缩文件 os = new FileOutputStream(dstFile); //创建一个和文件输出流相关的ObjectOutputStream oos = new ObjectOutputStream(os); //把赫夫曼编码后的文件写入压缩文件 oos.writeObject(huffmanBytes); //以对象流的方式写入赫夫曼编码,是为了恢复源文件时使用 //注意一定要把赫夫曼编码写入压缩文件 oos.writeObject(huffmanCodes); } catch (Exception e) { System.out.println(e.getMessage()); } finally { try { is.close(); oos.close(); os.close(); } catch (Exception e) { System.out.println(e.getMessage()); } } } /** * @param huffmanCodes 赫夫曼编码表 * @param huffmanBytes 赫夫曼编码得到的字节数组 * @return 原来字符串对应的数组 */ private static byte[] decode(Map<Byte, String> huffmanCodes, byte[] huffmanBytes) { StringBuilder stringBuilder = new StringBuilder(); for (int i = 0; i < huffmanBytes.length; i++) { boolean flag = (i == huffmanBytes.length - 1 ? false : true); stringBuilder.append(byteToBit(flag, huffmanBytes[i])); } //把字符串按照指定的赫夫曼编码进行解码 //把赫夫曼编码表反向转换,因为需要反向查找 Map<String, Byte> map = new HashMap<>(); for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) { map.put(entry.getValue(), entry.getKey()); } //创建集合存放byte List<Byte> list = new ArrayList<>(); for (int i = 0; i < stringBuilder.length(); ) { int count = 1; boolean flag = true; while (flag) { String key = stringBuilder.substring(i, i + count); Byte b = map.get(key); if (b != null) { flag = false; i = i + count; list.add(b); } else { count++; } } } System.out.println(list); byte[] decodeBytes = new byte[list.size()]; for (int i = 0; i < list.size(); i++) { decodeBytes[i] = list.get(i); } return decodeBytes; } /** * @param flag 标识是否需要补高位,如果是最后一个字节,无需补高位 * @param b * @return b对应的二进制的字符串(注意是按补码返回) */ private static String byteToBit(boolean flag, byte b) { int temp = b; //正数需要补高位 if (flag) { temp |= 256; } String str = Integer.toBinaryString(temp);//返回的是temp对应的二进制的补码 if (flag) { return str.substring(str.length() - 8); } else { return str; } } /** * 封装 * * @param bytes 原始字符串对应的字节数组 * @return 经过赫夫曼编码后的字节数组 */ private static byte[] huffmanZip(byte[] bytes) { List<Node> nodes = getNodes(bytes); //根据nodes创建赫夫曼树 Node huffmanTreeRoot = createHuffmanTree(nodes); //生成对应的赫夫曼编码 Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot); //根据赫夫曼编码压缩得到压缩后的赫夫曼编码字节数组 byte[] huffmanCodeBytes = zip(bytes, huffmanCodes); return huffmanCodeBytes; } /** * 将字符串通过生成的赫夫曼编码表返回赫夫曼编码压缩后的byte数组 * * @param bytes 原始的字符串数组对应的byte数组 * @param huffmanCodes 生成的赫夫曼编码表 * @return byte[]huffmanCodeBytes, 即8位对应一个byte,放入到huffmanCodeBytes * huffmanCodeBytes[0] = 10101000 =>byte[推导 10101000=>10100111(反码)=>11011000] */ private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) { StringBuilder stringBuilder = new StringBuilder(); for (byte b : bytes) { stringBuilder.append(huffmanCodes.get(b)); } //统计返回的长度 //int len = (stringBuilder.length() + 7) / 8; int len; if (stringBuilder.length() % 8 == 0) { len = stringBuilder.length() / 8; } else { len = stringBuilder.length() / 8 + 1; } //创建存储压缩后的byte数组 byte[] huffmanCodeBytes = new byte[len]; int index = 0;//记录第几个byte for (int i = 0; i < stringBuilder.length(); i = i + 8) { String strByte; if (i + 8 > stringBuilder.length()) { strByte = stringBuilder.substring(i); } else { strByte = stringBuilder.substring(i, i + 8); } //将strByte转成一个byte,放入到huffmanCodeBytes huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte, 2); index++; } return huffmanCodeBytes; } //生成赫夫曼编码表 //1、将赫夫曼编码表放在map中 static Map<Byte, String> huffmanCodes = new HashMap<>(); //2、拼接路径,定义一个StringBuilder存储某个叶子节点的路径 static StringBuilder stringBuilder = new StringBuilder(); //为了调用方便,重载getCodes方法 private static Map<Byte, String> getCodes(Node root) { if (root == null) { return null; } getCodes(root.left, "0", stringBuilder); getCodes(root.right, "1", stringBuilder); return huffmanCodes; } /** * @param node 传入节点 * @param code 路径:左子节点0,右子节点1 * @param stringBuilder 用于拼接路径 */ private static void getCodes(Node node, String code, StringBuilder stringBuilder) { StringBuilder stringBuilder2 = new StringBuilder(stringBuilder); //将code加入到stringBuilder2 stringBuilder2.append(code); if (node != null) { if (node.data == null) {//非叶子节点 //递归处理 //向左 getCodes(node.left, "0", stringBuilder2); //向右 getCodes(node.right, "1", stringBuilder2); } else { //找到了某个叶子节点的最后 huffmanCodes.put(node.data, stringBuilder2.toString()); } } } private static List<Node> getNodes(byte[] bytes) { ArrayList<Node> nodes = new ArrayList<>(); //遍历bytes,统计每个字符出现的次数,用map记录 Map<Byte, Integer> counts = new HashMap<>(); for (byte b : bytes) { Integer count = counts.get(b); if (count == null) { counts.put(b, 1); } else { counts.put(b, count + 1); } } //将map转成Node对象,并加入nodes集合中 for (Map.Entry<Byte, Integer> entry : counts.entrySet()) { nodes.add(new Node(entry.getKey(), entry.getValue())); } return nodes; } public static Node createHuffmanTree(List<Node> nodes) { while (nodes.size() > 1) { //排序 Collections.sort(nodes); //取出第一颗权值最小的二叉树 Node leftNode = nodes.get(0); //第二小的 Node righNode = nodes.get(1); //创建一个新的二叉树,它的根节点没有data,只有权值 Node parent = new Node(null, leftNode.weight + righNode.weight); parent.left = leftNode; parent.right = righNode; //将处理过的两颗二叉树从nodes中移除 nodes.remove(leftNode); nodes.remove(righNode); //将新的二叉树加入到nodes nodes.add(parent); } //最后剩下的节点为赫夫曼树的根节点 return nodes.get(0); } private static void preOrder(Node root) { if (root != null) { root.preOrder(); } else { System.out.println("赫夫曼树为空"); } } } class Node implements Comparable<Node> { Byte data;//存放数据本身,比如'a'->97 int weight;//权值,表示字符出现的次数 Node left; Node right; public Node(Byte data, int weight) { this.data = data; this.weight = weight; } @Override public String toString() { return "Node{" + "data=" + data + ", weight=" + weight + '}'; } @Override public int compareTo(Node o) { return this.weight - o.weight; } //前序遍历 public void preOrder() { System.out.println(this); if (this.left != null) { this.left.preOrder(); } if (this.right != null) { this.right.preOrder(); } } }