zoukankan      html  css  js  c++  java
  • Python : 计算大文件MD5值

    buffer = 8192 来自网络,但不知道是怎么实践出来的。

    from hashlib import md5
    import time
    import os 
    
    def calMD5(str):
        m = md5()
        m.update(str)
        
        return m.hexdigest()
    
    def calMD5ForFile(file):
        statinfo = os.stat(file)
        
        if int(statinfo.st_size)/(1024*1024) >= 1000 :
            print "File size > 1000, move to big file..."
            return calMD5ForBigFile(file)
       
        
        m = md5()
        f = open(file, 'rb')
        m.update(f.read())
        f.close()
        
        return m.hexdigest()
    
    def calMD5ForFolder(dir,MD5File):
        outfile = open(MD5File,'w')
        for root, subdirs, files in os.walk(dir):
            for file in files:
                filefullpath = os.path.join(root, file)
                """print filefullpath"""
                
                filerelpath = os.path.relpath(filefullpath, dir)
                md5 = calMD5ForFile(filefullpath)
                outfile.write(filerelpath+' '+md5+"\n")
        outfile.close()
    
    def calMD5ForBigFile(file):
        m = md5()
        f = open(file, 'rb')
        buffer = 8192    # why is 8192 | 8192 is fast than 2048
        
        while 1:
            chunk = f.read(buffer)
            if not chunk : break
            m.update(chunk)
            
        f.close()
        return m.hexdigest()
        
        
    
    
    if __name__ == "__main__":
        #print calMD5("Hello World!")
        
        t = time.time()
        print calMD5ForFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")
        print time.time() - t     
        t = time.time()
        print calMD5ForBigFile("E:\\OS\\ubuntu-11.04-desktop-i386.iso")
        print time.time() - t,"\n"
        
        t = time.time()
        print calMD5ForFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")
        print time.time() - t    
        t = time.time()
        print calMD5ForBigFile("E:\\OS\\ubuntu-12.04-desktop-amd64.iso")
        print time.time() - t,"\n"
        
        t = time.time()
        print calMD5ForFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")
        print time.time() - t    
        t = time.time()
        print calMD5ForBigFile("D:\\Virtual Machines\\Ubuntu 64-bit\\Ubuntu 64-bit-s001.vmdk")
        print time.time() - t,"\n"
        
    
        
    #output    
    #8b1085bed498b82ef1485ef19074c281
    #2.57500004768
    #8b1085bed498b82ef1485ef19074c281
    #3.34100008011 
    #
    #128f0c16f4734c420b0185a492d92e52
    #2.632999897
    #128f0c16f4734c420b0185a492d92e52
    #3.39100003242 
    #
    #File size > 1000, move to big file...
    #ec1fa4dc1b32569e9da7b4744548a9ef
    #5.40100002289
    #ec1fa4dc1b32569e9da7b4744548a9ef
    #5.42100000381 
    

      

    PS: 纪念下我直接计算3G+文件时的内存使用率

  • 相关阅读:
    JNI概述
    Android shape的使用
    全局对象Application的使用,以及如何在任何地方得到Application全局对象
    EditText中禁止输入中文的方法
    利用Selenium实现图片文件上传的两种方式介绍
    LoadRunner结果分析 – TPS
    详解 Spotlight on MySQL监控MySQL服务器
    Linux 服务器运行健康状况监控利器 Spotlight on Unix 的安装与使用
    资源监控工具Spotlight-使用说明
    RobotFrameWork+APPIUM实现对安卓APK的自动化测试----第七篇【元素定位介绍】
  • 原文地址:https://www.cnblogs.com/cstudio/p/2670276.html
Copyright © 2011-2022 走看看