zoukankan      html  css  js  c++  java
  • python单线程,多线程和协程速度对比

            在某些应用场景下,想要提高python的并发能力,可以使用多线程,或者协程。比如网络爬虫,数据库操作等一些IO密集型的操作。下面对比python单线程,多线程和协程在网络爬虫场景下的速度。

    一,单线程。

      单线程代 

     1 #!/usr/bin/env 
    2 # coding:utf8

    3
    # Author: hz_oracle 4 5 import MySQLdb 6 import gevent 7 import requests 8 import time 9 10 11 class DbHandler(object): 12 def __init__(self, host, port, user, pwd, dbname): 13 self.host = host 14 self.port = port 15 self.user = user 16 self.pwd = pwd 17 self.db = dbname 18 19 def db_conn(self): 20 try: 21 self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8") 22 self.cursor = self.conn.cursor() 23 return 1 24 except Exception as e: 25 return 0 26 27 def get_urls(self, limitation): 28 sql = """select pic from picurltable limit %s""" % limitation 29 urls_list = list() 30 try: 31 self.cursor.execute(sql) 32 fetchresult = self.cursor.fetchall() 33 for line in fetchresult: 34 urls_list.append(line[0]) 35 print len(urls_list) 36 except Exception as e: 37 print u"数据库查询失败:%s" % e 38 return [] 39 return urls_list 40 41 def db_close(self): 42 self.conn.close() 43 44 45 def get_pic(url): 46 try: 47 pic_obj = requests.get(url).content 48 except Exception as e: 49 print u"图片出错" 50 return "" 51 filename = url.split('/')[-2] 52 file_path = "./picture/" + filename + '.jpg' 53 fp = file(file_path, 'wb') 54 fp.write(pic_obj) 55 fp.close() 56 return "ok" 57 58 59 def main(): 60 start_time = time.time() 61 db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic') 62 db_obj.db_conn() 63 url_list = db_obj.get_urls(100)
    64 map(get_pic, url_list) 65 #for url in url_list: 66 # get_pic(url) 67 end_time = time.time() 68 costtime = float(end_time) - float(start_time) 69 print costtime 70 print "download END" 71 72 if __name__ == "__main__": 73 main()

      运行结果

    100
    45.1282339096
    download END

    单线程情况下,下载100张图片花了45秒。

    再来看多线程的情况下。

    #!/usr/bin/env python
    # coding:utf8
    # Author: hz_oracle
    
    import MySQLdb
    import gevent
    import requests
    import time
    import threading
    import Queue
    
    lock1 = threading.RLock()
    url_queue = Queue.Queue()
    urls_list = list()
    
    
    class DbHandler(object):
        def __init__(self, host, port, user, pwd, dbname):
            self.host = host
            self.port = port
            self.user = user
            self.pwd = pwd
            self.db = dbname
    
        def db_conn(self):
            try:
                self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
                self.cursor = self.conn.cursor()
                return 1
            except Exception as e:
                return 0
    
        def get_urls(self, limitation):
            sql = """select pic  from  picurltable limit  %s""" % limitation
            try:
                self.cursor.execute(sql)
                fetchresult = self.cursor.fetchall()
                for line in fetchresult:
                    url_queue.put(line[0])
            except Exception as e:
                print u"数据库查询失败:%s"  % e
                return 0
            return 1
    
        def db_close(self):
            self.conn.close()
    
    
    class MyThread(threading.Thread):
        def __init__(self):
            super(MyThread, self).__init__()
    
        def run(self):
            url = url_queue.get()
            try:
                pic_obj = requests.get(url).content
            except Exception as e:
                print u"图片出错"
                return ""
            filename = url.split('/')[-2]
            file_path = "./picture/" + filename + '.jpg'
            fp = file(file_path, 'wb')
            fp.write(pic_obj)
            fp.close()
    
    
    def main():
        start_time = time.time()
        db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
        db_obj.db_conn()
        db_obj.get_urls(100)
        for i in range(100):
            i = MyThread()
            i.start()
        while True:
            if threading.active_count()<=1:
                break
        end_time = time.time()
        costtime = float(end_time) - float(start_time)
        print costtime
        print "download END"
    
    if __name__ == "__main__":
        main()

    运行结果

    15.408192873
    download END

    启用100个线程发现只要花15秒即可完成任务,100个线程可能不是最优的方案,但较单线程有很明显的提升。接着再来看协程。

    协程代码

    #!/usr/bin/env python
    # coding:utf8
    # Author: hz_oracle
    
    import MySQLdb
    import requests
    import time
    import threading
    import Queue
    
    from gevent import monkey; monkey.patch_all()
    import gevent
    
    
    class DbHandler(object):
        def __init__(self, host, port, user, pwd, dbname):
            self.host = host
            self.port = port
            self.user = user
            self.pwd = pwd
            self.db = dbname
    
        def db_conn(self):
            try:
                self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
                self.cursor = self.conn.cursor()
                return 1
            except Exception as e:
                return 0
    
        def get_urls(self, limitation):
            urls_list = list()
            sql = """select pic  from  picurltable limit  %s""" % limitation
            try:
                self.cursor.execute(sql)
                fetchresult = self.cursor.fetchall()
                for line in fetchresult:
                    urls_list.append(line[0])
            except Exception as e:
                print u"数据库查询失败:%s"  % e
                return []
            return urls_list
    
        def db_close(self):
            self.conn.close()
    
    
    def get_pic(url):
        try:
            pic_obj = requests.get(url).content
        except Exception as e:
            print u"图片出错"
            return ""
        filename = url.split('/')[-2]
        file_path = "./picture/" + filename + '.jpg'
        fp = file(file_path, 'wb')
        fp.write(pic_obj)
        fp.close()
        return "ok"
    
    
    def main():
        start_time = time.time()
        db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
        db_obj.db_conn()
        url_list = db_obj.get_urls(100)
        gevent.joinall([gevent.spawn(get_pic,url) for url in url_list])
    
        end_time = time.time()
        costtime = float(end_time) - float(start_time)
        print costtime
        print "download END"
    
    if __name__ == "__main__":
        main()

    运行结果

    10.6234440804
    download END

    使用协程发现只花了10秒多,也就是三种方法中最快的。

    总结:

            三种方法中,单线程最慢,多线程次之,而协程最快。 不过如果对多线程进行优化,也可能变快,这里不讨论。

  • 相关阅读:
    Andrew Ng机器学习 五:Regularized Linear Regression and Bias v.s. Variance
    cadence时各种焊盘层的定义
    cadence 关于PCB封装的设计
    candence 关于焊盘的描述
    cadence 关于DRC设置
    cadence的关于封装层叠
    cadence 学习
    stm32的一些参数
    无刷电机的FOC理解
    wait()、notify、notifyAll()的使用
  • 原文地址:https://www.cnblogs.com/hzpythoner/p/7777910.html
Copyright © 2011-2022 走看看