zoukankan      html  css  js  c++  java
  • huffman编解码英文文本[Python]

    对英文文本的字母进行huffman编码,heapq优先队列构建huffman树

    python huffman.py source.txt result.txt
     1 import sys
     2 import heapq
     3 import collections
     4 
     5 class Node(object):
     6     def __init__(self,value = None,count = 1,left = None,right = None, code = ''):
     7         self.value = value
     8         self.count = count
     9         self.left = left
    10         self.right = right
    11         self.code = code
    12 
    13     def isleaf(self):
    14         if self.left != None:
    15             return False
    16         elif self.right != None:
    17             return False
    18         else:
    19             return True
    20 
    21     def __repr__(self):
    22         return "Node(%r,%r)"%(self.value,self.count)
    23     #for sort or priority queue
    24     def __lt__(self,other):
    25         return self.count < other.count
    26     #for operator +
    27     def __add__(self,other):
    28         self.code = 0
    29         other.code = 1
    30         # only leaf node value is need
    31         return Node(self.value+other.value,self.count+other.count,self,other)
    32 
    33 def getTreeRoot(text):
    34     Counter = collections.Counter(text)
    35     head = [Node(k,v) for (k,v) in Counter.items()]
    36     heapq.heapify(head)
    37 
    38     while len(head) >= 2:
    39         heapq.heappush(head, heapq.heappop(head) + heapq.heappop(head))
    40 
    41     root = head[0]
    42     return root
    43 
    44 def huffman(root, prefix = []):
    45     code = {}
    46     if root is None:
    47         return code
    48     prefix = prefix + [root.code]
    49     if root.isleaf():
    50         code[root.value] = prefix
    51     else:
    52         code.update(huffman(root.left,prefix))
    53         code.update(huffman(root.right,prefix))
    54     return code
    55 
    56 def gethuffmantext(text):
    57     root = getTreeRoot(text)
    58     codebook = huffman(root)
    59     for k,v in codebook.items():
    60         newv = "".join(str(char) for char in v)
    61         codebook[k] = newv
    62     print (codebook)
    63     print("The original text size is {}".format(len(text) * 8))
    64     huffmantext = []
    65     lenhuffman = 0
    66     for char in text:
    67         lenhuffman += len(codebook[char])
    68         huffmantext.append(codebook[char])
    69     print ("The huffman code text size is {}".format(lenhuffman))
    70 
    71     return huffmantext, codebook, lenhuffman
    72 
    73 def gettextfromhuffmancode(huffmantext,codebook):
    74 
    75     reversecodebook = {value:key for (key,value) in codebook.items()}
    76 
    77     text = []
    78     for huffmancode in huffmantext:
    79         text.append(reversecodebook[huffmancode])
    80 
    81     return text
    82 
    83 
    84 if __name__ == "__main__":
    85     text = open(sys.argv[1],"rb").read()
    86     newfile = sys.argv[2]
    87     #huffmantext, codebook, lenhuffman = gethuffmantext("hello world")
    88     huffmantext, codebook, lenhuffman = gethuffmantext(text)
    89     text1 = gettextfromhuffmancode(huffmantext,codebook)
    90     text1 = "".join(str(v) for v in text1)
    91 
    92     lenoftext = len(text)*8.0
    93     lenofhuffman = lenhuffman
    94 
    95     print ("The compression ratio is %lf  
    " % (1.0 * lenhuffman / lenoftext ))
    96 
    97     fp = open(newfile,"wb")
    98     fp.write(text1)
    99     fp.close()
  • 相关阅读:
    Linux下安装破解JIRA 6.3.6 并连接MYSQL5
    centos7 系统安装问题汇总
    CentOS7安装iptables防火墙
    Vue全家桶实战 从零独立开发企业级电商系统
    小米笔记本pro充电10秒断开
    mac电脑的使用
    autojs解决方案
    auto.js连接vscode
    小米6手机刷机亲测详解
    #002前端基础-JS-浏览器中堆栈内存的底层处理
  • 原文地址:https://www.cnblogs.com/demian/p/7884753.html
Copyright © 2011-2022 走看看