zoukankan      html  css  js  c++  java
  • Python实现多线程下载

    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    # filename: paxel.py
    
    '''It is a multi-thread downloading tool
    
        It was developed follow axel.
            Author: volans
            E-mail: volansw [at] gmail.com
    '''
    
    import sys
    import os
    import time
    import urllib
    from threading import Thread
    
    local_proxies = {'http': 'http://131.139.58.200:8080'}
    
    class AxelPython(Thread, urllib.FancyURLopener):
        '''Multi-thread downloading class.
    
            run() is a vitural method of Thread.
        '''
        def __init__(self, threadname, url, filename, ranges=0, proxies={}):
            Thread.__init__(self, name=threadname)
            urllib.FancyURLopener.__init__(self, proxies)
            self.name = threadname
            self.url = url
            self.filename = filename
            self.ranges = ranges
            self.downloaded = 0
    
        def run(self):
            '''vertual function in Thread'''
            try:
                self.downloaded = os.path.getsize( self.filename )
            except OSError:
                #print 'never downloaded'
                self.downloaded = 0
    
            # rebuild start poind
            self.startpoint = self.ranges[0] + self.downloaded
            
            # This part is completed
            if self.startpoint >= self.ranges[1]:
                print 'Part %s has been downloaded over.' % self.filename
                return
            
            self.oneTimeSize = 16384 #16kByte/time
            print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1])
    
            self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1]))
                
            self.urlhandle = self.open( self.url )
    
            data = self.urlhandle.read( self.oneTimeSize )
            while data:
                filehandle = open( self.filename, 'ab+' )
                filehandle.write( data )
                filehandle.close()
    
                self.downloaded += len( data )
                #print "%s" % (self.name)
                #progress = u'
    ...'
    
                data = self.urlhandle.read( self.oneTimeSize )
            
    def GetUrlFileSize(url, proxies={}):
        urlHandler = urllib.urlopen( url, proxies=proxies )
        headers = urlHandler.info().headers
        length = 0
        for header in headers:
            if header.find('Length') != -1:
                length = header.split(':')[-1].strip()
                length = int(length)
        return length
    
    def SpliteBlocks(totalsize, blocknumber):
        blocksize = totalsize/blocknumber
        ranges = []
        for i in range(0, blocknumber-1):
            ranges.append((i*blocksize, i*blocksize +blocksize - 1))
        ranges.append(( blocksize*(blocknumber-1), totalsize -1 ))
    
        return ranges
    def islive(tasks):
        for task in tasks:
            if task.isAlive():
                return True
        return False
    
    def paxel(url, output, blocks=6, proxies=local_proxies):
        ''' paxel
        '''
        size = GetUrlFileSize( url, proxies )
        ranges = SpliteBlocks( size, blocks )
    
        threadname = [ "thread_%d" % i for i in range(0, blocks) ]
        filename = [ "tmpfile_%d" % i for i in range(0, blocks) ]
      
        tasks = []
        for i in range(0,blocks):
            task = AxelPython( threadname[i], url, filename[i], ranges[i] )
            task.setDaemon( True )
            task.start()
            tasks.append( task )
            
        time.sleep( 2 )
        while islive(tasks):
            downloaded = sum( [task.downloaded for task in tasks] )
            process = downloaded/float(size)*100
            show = u'
    Filesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process)
            sys.stdout.write(show)
            sys.stdout.flush()
            time.sleep( 0.5 )
                
        filehandle = open( output, 'wb+' )
        for i in filename:
            f = open( i, 'rb' )
            filehandle.write( f.read() )
            f.close()
            try:
                os.remove(i)
                pass
            except:
                pass
    
        filehandle.close()
    
    if __name__ == '__main__':
        url = "http://www.pygtk.org/dist/pygtk2-tut.pdf"
        output = 'pygtk2.pdf'
        paxel( url, output, blocks=4, proxies={} )
  • 相关阅读:
    《MySQL是怎样运行的:从根儿上理解MySQL》笔记4
    《MySQL是怎样运行的:从根儿上理解MySQL》笔记3
    推荐一个对比jar包依赖的工具
    《MySQL是怎样运行的:从根儿上理解MySQL》笔记2
    《MySQL是怎样运行的:从根儿上理解MySQL》笔记1
    查询异步更新状态的一种思路
    springAop:Aop(Xml)配置,Aop注解配置,spring_Aop综合案例,Aop底层原理分析
    java知识点总结
    Maven基础&&Spring框架阶段常用工具类整理
    Idea快捷键整理
  • 原文地址:https://www.cnblogs.com/lanzhi/p/6468455.html
Copyright © 2011-2022 走看看