zoukankan      html  css  js  c++  java
  • python写的多线程下载工具

    其实只是想练习一下threading的用法。

    写完后发现其实下载速度也没增加多少,略显尴尬,汗

    # -*- coding: cp936 -*-
    import urllib2
    import threading
    import os,os.path
    import sys
    import time,datetime
    
    url = ''
    path = os.getcwd()
    
    file_name = ''
    file_fp=''
    net_filesize = 0
    size_download = 0
    threadlock = threading.Lock()
    
    Thread_Num = 10
    Block_Size = 16384
    
    def get_filename_from_url(url):
        return url[url.rfind('/')+1:]
    
    def get_net_file_info(url):
        r = urllib2.urlopen(url)
        fs = int(r.info()['Content-Length'])
        if r.info().has_key('Content-Disposition'):
            fn = r.info()['Content-Disposition'].split('filename=')[1]
            fn = fn.replace('"', '').replace("'", "")
        else:
            fn = get_filename_from_url(url)
        r.close()
        return fs,fn
    
    def file_write_init():
        global file_fp,net_filesize,file_name
        net_filesize,file_name = get_net_file_info(url)
        full_path= os.path.join(path,file_name)
        if os.path.isfile(full_path) == True:return False
        file_fp = file(full_path,"wb")
        file_fp.truncate(net_filesize)
        return True
        
    def file_write(pos,dat):
        global size_download
        threadlock.acquire()
        size_download += len(dat)
        #print 'size_download:%d'%(size_download )
        file_fp.seek(pos)
        file_fp.write(dat)
        threadlock.release()
        
    def file_write_finish():
        file_fp.close()
    
    class mpdown(threading.Thread):
        def __init__(self,range_start,range_end):
            threading.Thread.__init__(self)
            self.range_start = range_start
            self.range_end = range_end
        def run(self):
            req = urllib2.Request(url)
            req.add_header('Range','bytes=%d-%d'%(self.range_start,self.range_end))
            r = urllib2.urlopen(req)
            #print r.info()
            pos_base = self.range_start
            pos = 0
            size_to_get = self.range_end - self.range_start +1
            while pos<size_to_get:
                dat = r.read(Block_Size)
                l = len(dat)
                if pos+len(dat)>size_to_get:
                    dat = dat[:size_to_get-pos]
                file_write(pos+pos_base,dat)
                pos += len(dat)
    
    def download_one_thread():
        pass
    
    def download():
        #check_range_acceptable()
        #if file_length<thread_num  use single thread
        #download_one_thread()
        #block size of each thread
    
        time_start = datetime.datetime.now()
        if file_write_init() == False:
            return 
        print 'file name:%s'%(file_name)
        print 'net_filesize:%dbyte'%(net_filesize)
        print 'multi process downloading...'
        threads = []
        thread_size = net_filesize / Thread_Num
        left_size = net_filesize % Thread_Num
        pos = 0
        for i in xrange(0,Thread_Num):
            range_start = pos
            pos += thread_size-1
            if i==0:pos += left_size
            range_end = pos
            pos += 1
            t = mpdown(range_start,range_end)
            threads.append(t)
            #print 'range=%d-%d'%(range_start,range_end)
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        file_write_finish()
    
        time_end = datetime.datetime.now()
        s = (time_end-time_start).seconds
        if s<1:s=1
        print 'time passed:%dh %dm %ds'%(s/3600,s/60,s%60)
        print 'speed:%dkb/s'%(net_filesize/1000.0/s)
        
    
    def main():
        global url
        args = len(sys.argv)
        if args>1:
            for i in range(1,args):
                url = sys.argv[i]
                download()
        else:
            url = raw_input("input download url:")
            download()
    
    def test():
        req = urllib2.Request(url)
        req.add_header('Range','bytes=%d-%d'%(1,500))
        print req
        r = urllib2.urlopen(req)
        print r.info()
    
    if __name__ == '__main__':
        main()
    

      

  • 相关阅读:
    (二)ABP+MetroNic--- 用户管理及角色管理的实现
    BootstrapValidator 自定义服务端验证
    ASP.Net Web API 的参数绑定[翻译]
    ABP webapi IDInput传参
    JqueryTable ServerSide Ajax 数据加载及样式设置
    JqueryTable踩过的坑
    Lambda 表达式中 动态解析OrderbyLinQ语句的实现
    jenkins问题整理
    maven profile实现多环境配置
    Spring 发送内嵌图片的邮件 遇到的问题
  • 原文地址:https://www.cnblogs.com/fwindpeak/p/4060759.html
Copyright © 2011-2022 走看看