zoukankan      html  css  js  c++  java
  • 单线程爬虫VS多线程爬虫的效率对比

    单线程爬虫:

    import re
    import requests
    import time
    
    url_EB = 'http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521'
    headers_EB = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    url_AML = '''https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
    &pickerToList=brandtextbin&ie=UTF8&qid=1461899728'''
    headers_AML ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    url_DL= 'https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862'
    headers_DL = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    
    
    
    
    name = {'a':'ExclusiveBulbs',
            'b':'Amazing Lamps',
            'c':'Dynamic Lamps'}
    
    # listing_count = re.findall('<span class="narrowValue">(.*?)</span',data.text)
    # f = dict(map(lambda x,y:[x,y],store_name,listing_count))
    #
    # for k,v in f.items():
    #     print(k,v)
    
    
    
    
    
    
    
    def foo_one(url,headers,name):
        print('--------------------------开始爬去{0}at{1}---------------------------'.format(name,time.ctime()))
    
        response = requests.get(url,headers=headers)
        store_name = re.findall('<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span',response.text)
        for i in store_name:
            print(i)
        print('--------------------------爬去完毕at{}----------------------------'.format(time.ctime()))
        time.sleep(1)
    
    
    
    
    
    
    if __name__ == '__main__':
        foo_one(url_EB,headers_EB,name['a'])
        foo_one(url_AML,headers_AML,name['b'])
        foo_one(url_DL,headers_DL,name['c'])
    

    输出:00:25:33开始,00:26:02结束 耗时29秒

    --------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:25:33 2016---------------------------
    ('A.Shine', ' (97)')
    ('AmpacElectronics', ' (1,644)')
    ('AuraBeam', ' (33,084)')
    ('AWO', ' (1,206)')
    ('Battery1inc', ' (694)')
    ('Comoze Lamps', ' (6,172)')
    ('Compatible Lamp', ' (317)')
    ('Corgi Lamps', ' (2,124)')
    ('CTLAMP', ' (3,499)')
    ('Dell', ' (191)')
    ('Diamond Lamps', ' (966)')
    ('Dynamic', ' (4)')
    ('Eiki', ' (460)')
    ('ePharos', ' (2,592)')
    ('Epson', ' (1,456)')
    ('EREPLACEMENT', ' (115)')
    ('eReplacements', ' (814)')
    ('eWo's', ' (120)')
    ('eWorldlamp', ' (354)')
    ('FI Lamps', ' (5,707)')
    ('FL Projector Lamp For Mitsubishi', ' (1)')
    ('For Epson', ' (3)')
    ('Generic', ' (9,769)')
    ('Good Lamp', ' (819)')
    ('HCDZ', ' (2,746)')
    ('Hitachi', ' (935)')
    ('IET Lamps', ' (2,144)')
    ('InFocus', ' (44)')
    ('JVC', ' (326)')
    ('KCL', ' (3,781)')
    ('Lampedia', ' (618)')
    ('Lutema', ' (1,956)')
    ('Mitsubishi', ' (1,006)')
    ('Mogobe', ' (1,335)')
    ('MyProjectorLamps', ' (473)')
    ('NEC', ' (446)')
    ('Nec Computers', ' (13)')
    ('Optoma', ' (956)')
    ('Osram Sylvania', ' (78)')
    ('Panasonic', ' (820)')
    ('Philips', ' (7,502)')
    ('Powerwarehouse', ' (9,971)')
    ('Projector Lamps World', ' (112)')
    ('Pureglare', ' (369)')
    ('Samsung', ' (1,078)')
    ('Sharp', ' (426)')
    ('Shopforbattery', ' (2,510)')
    ('SMART BOARD', ' (66)')
    ('Sony', ' (990)')
    ('TVLampsforless', ' (14)')
    ('Unknown', ' (722)')
    --------------------------爬去完毕atSat Apr 30 00:25:57 2016----------------------------
    --------------------------开始爬去Amazing LampsatSat Apr 30 00:25:58 2016---------------------------
    ('AWO', ' (1)')
    ('Comoze Lamps', ' (2)')
    ('DNGO', ' (8)')
    ('Electrified', ' (9)')
    ('ELECTRIFIED', ' (10)')
    ('Electrified Discounters', ' (5)')
    ('ELECTRIFIED LAMPS', ' (1,177)')
    ('ELECTRIFIED PRINTHEAD', ' (24)')
    ('ELECTRIFIED PRINTHEADS', ' (2)')
    ('FI Lamps', ' (2)')
    ('Generic', ' (34)')
    ('GloWatt', ' (1)')
    ('KCL', ' (1)')
    ('OEM', ' (1)')
    ('Powerwarehouse', ' (7)')
    ('SKU', ' (5)')
    ('Top Lamp', ' (1)')
    ('Unknown', ' (1)')
    ('USOM', ' (3)')
    --------------------------爬去完毕atSat Apr 30 00:26:00 2016----------------------------
    --------------------------开始爬去Dynamic LampsatSat Apr 30 00:26:01 2016---------------------------
    ('Battery1inc', ' (85)')
    ('BenQ', ' (237)')
    ('Buslink', ' (31)')
    ('Calumet', ' (2)')
    ('Comoze Lamps', ' (405)')
    ('CTLAMP', ' (615)')
    ('Dell', ' (82)')
    ('Divine Lighting', ' (36)')
    ('DNGO', ' (63)')
    ('Dynamic', ' (4)')
    ('Eiko', ' (140)')
    ('Electrified', ' (2)')
    ('ELECTRIFIED LAMPS', ' (24)')
    ('Electronix Xpress', ' (418)')
    ('ePharos', ' (502)')
    ('Epson', ' (631)')
    ('eReplacements', ' (119)')
    ('FI Lamps', ' (505)')
    ('FL Projector Lamp For Mitsubishi', ' (1)')
    ('G-lamps', ' (43)')
    ('GE', ' (248)')
    ('GE Lighting', ' (152)')
    ('General Electric', ' (53)')
    ('Generic', ' (1,671)')
    ('Genie', ' (101)')
    ('GLAMPS', ' (2)')
    ('Impact', ' (7)')
    ('Industrial Lighting Solutions', ' (9)')
    ('KCL', ' (280)')
    ('Kodak', ' (1)')
    ('Lampedia', ' (63)')
    ('M-Wave', ' (830)')
    ('Mitsubishi', ' (406)')
    ('Mitsubishi DLP TV Bulbs', ' (29)')
    ('Mocpinc', ' (10)')
    ('MyProjectorLamps', ' (344)')
    ('Nec', ' (19)')
    ('Optoma', ' (161)')
    ('Osram', ' (1,295)')
    ('Panasonic', ' (245)')
    ('Philips', ' (988)')
    ('Powerwarehouse', ' (239)')
    ('Projector Lamps World', ' (45)')
    ('Pureglare', ' (107)')
    ('Samsung', ' (323)')
    ('ShopJimmy', ' (3)')
    ('Sony', ' (141)')
    ('Sylvania', ' (115)')
    ('Technical Precision', ' (10)')
    ('Unknown', ' (167)')
    ('Welch Allyn Compatible', ' (1)')
    --------------------------爬去完毕atSat Apr 30 00:26:02 2016----------------------------
    

    多线程:00:32:37开始00:32:39结束 耗时2秒

    import re
    import requests
    
    import threading
    import time
    from time import ctime,sleep
    
    url_EB = 'http://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A22XNR713HGDVG&rh=n%3A9063592011%2Ck%3Aprojector&bbn=9063592011&keywords=projector&pickerToList=brandtextbin&ie=UTF8&qid=1461902521'
    headers_EB = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    url_AML = '''https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=A3UJI9WWE6PRP5&rh=i%3Amerchant-items
    &pickerToList=brandtextbin&ie=UTF8&qid=1461899728'''
    headers_AML ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    url_DL= 'https://www.amazon.com/gp/search/other/ref=sr_sa_p_4?me=AS7ZU4MN0FPOY&rh=i%3Amerchant-items&pickerToList=brandtextbin&ie=UTF8&qid=1461901862'
    headers_DL = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    
    
    
    
    
    
    name = {'a':'ExclusiveBulbs',
            'b':'Amazing Lamps',
            'c':'Dynamic Lamps'}
    
    # listing_count = re.findall('<span class="narrowValue">(.*?)</span',data.text)
    # f = dict(map(lambda x,y:[x,y],store_name,listing_count))
    #
    # for k,v in f.items():
    #     print(k,v)
    
    
    
    
    
    
    
    def foo_one(url,headers,name):
        print('--------------------------开始爬去{0}at{1}---------------------------'.format(name,time.ctime()))
    
        response = requests.get(url,headers=headers)
        store_name = re.findall('<span class="refinementLink">(.*?)</span><span class="narrowValue">(.*?)</span',response.text)
        for i in store_name:
            print(i)
        print('--------------------------爬去完毕{0}at{1}----------------------------'.format(name,time.ctime()))
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    threads = []
    t1 = threading.Thread(target=foo_one,args=(url_EB,headers_EB,name['a']))
    threads.append(t1)
    t2 = threading.Thread(target=foo_one,args=(url_AML,headers_AML,name['b']))
    threads.append(t2)
    t3 = threading.Thread(target=foo_one,args=(url_DL,headers_DL,name['c']))
    threads.append(t3)
    
    if __name__ == '__main__':
        for t in threads:
            t.setDaemon(True)
            t.start()
        t.join()
    
        print ("all over %s" %ctime())
    

    输出:

    --------------------------开始爬去ExclusiveBulbsatSat Apr 30 00:32:37 2016---------------------------
    --------------------------开始爬去Amazing LampsatSat Apr 30 00:32:37 2016---------------------------
    --------------------------开始爬去Dynamic LampsatSat Apr 30 00:32:37 2016---------------------------
    ('A.Shine', ' (97)')
    ('AmpacElectronics', ' (1,645)')
    ('AuraBeam', ' (33,088)')
    ('AWO', ' (1,209)')
    ('Battery1inc', ' (694)')
    ('Comoze Lamps', ' (6,172)')
    ('Compatible Lamp', ' (317)')
    ('Corgi Lamps', ' (2,123)')
    ('CTLAMP', ' (3,501)')
    ('Dell', ' (191)')
    ('Diamond Lamps', ' (966)')
    ('Dynamic', ' (4)')
    ('Eiki', ' (457)')
    ('ePharos', ' (2,592)')
    ('Epson', ' (1,456)')
    ('EREPLACEMENT', ' (115)')
    ('eReplacements', ' (813)')
    ('eWo's', ' (120)')
    ('eWorldlamp', ' (354)')
    ('FI Lamps', ' (5,710)')
    ('FL Projector Lamp For Mitsubishi', ' (1)')
    ('For Epson', ' (3)')
    ('Generic', ' (9,771)')
    ('Good Lamp', ' (819)')
    ('HCDZ', ' (2,748)')
    ('Hitachi', ' (935)')
    ('IET Lamps', ' (2,137)')
    ('InFocus', ' (44)')
    ('JVC', ' (326)')
    ('KCL', ' (3,783)')
    ('Lampedia', ' (618)')
    ('Lutema', ' (1,955)')
    ('Mitsubishi', ' (1,006)')
    ('Mogobe', ' (1,336)')
    ('MyProjectorLamps', ' (473)')
    ('NEC', ' (450)')
    ('Nec Computers', ' (13)')
    ('Optoma', ' (956)')
    ('Osram Sylvania', ' (78)')
    ('Panasonic', ' (820)')
    ('Philips', ' (7,502)')
    ('Powerwarehouse', ' (9,972)')
    ('Projector Lamps World', ' (112)')
    ('Pureglare', ' (369)')
    ('Samsung', ' (1,078)')
    ('Sharp', ' (426)')
    ('Shopforbattery', ' (2,511)')
    ('SMART BOARD', ' (66)')
    ('Sony', ' (990)')
    ('TVLampsforless', ' (14)')
    ('Unknown', ' (722)')
    --------------------------爬去完毕ExclusiveBulbsatSat Apr 30 00:32:38 2016----------------------------
    ('Battery1inc', ' (85)')
    ('BenQ', ' (237)')
    ('Buslink', ' (31)')
    ('Calumet', ' (2)')
    ('Comoze Lamps', ' (405)')
    ('CTLAMP', ' (615)')
    ('Dell', ' (82)')
    ('Divine Lighting', ' (36)')
    ('DNGO', ' (63)')
    ('Dynamic', ' (4)')
    ('Eiko', ' (140)')
    ('Electrified', ' (2)')
    ('ELECTRIFIED LAMPS', ' (24)')
    ('Electronix Xpress', ' (418)')
    ('ePharos', ' (502)')
    ('Epson', ' (631)')
    ('eReplacements', ' (119)')
    ('FI Lamps', ' (505)')
    ('FL Projector Lamp For Mitsubishi', ' (1)')
    ('G-lamps', ' (43)')
    ('GE', ' (248)')
    ('GE Lighting', ' (152)')
    ('General Electric', ' (53)')
    ('Generic', ' (1,671)')
    ('Genie', ' (101)')
    ('GLAMPS', ' (2)')
    ('Impact', ' (7)')
    ('Industrial Lighting Solutions', ' (9)')
    ('KCL', ' (280)')
    ('Kodak', ' (1)')
    ('Lampedia', ' (63)')
    ('M-Wave', ' (830)')
    ('Mitsubishi', ' (406)')
    ('Mitsubishi DLP TV Bulbs', ' (29)')
    ('Mocpinc', ' (10)')
    ('MyProjectorLamps', ' (344)')
    ('Nec', ' (19)')
    ('Optoma', ' (161)')
    ('Osram', ' (1,295)')
    ('Panasonic', ' (245)')
    ('Philips', ' (988)')
    ('Powerwarehouse', ' (239)')
    ('Projector Lamps World', ' (45)')
    ('Pureglare', ' (107)')
    ('Samsung', ' (323)')
    ('ShopJimmy', ' (3)')
    ('Sony', ' (141)')
    ('Sylvania', ' (115)')
    ('Technical Precision', ' (10)')
    ('Unknown', ' (167)')
    ('Welch Allyn Compatible', ' (1)')
    --------------------------爬去完毕Dynamic LampsatSat Apr 30 00:32:39 2016----------------------------
    all over Sat Apr 30 00:32:39 2016
    

      

  • 相关阅读:
    C++文件(夹)选择对话框
    BCB中选择文件对话框TOpenDialog过滤后缀名使用方法
    pjlib深入剖析和使用详解
    PJNATH介绍 -- 开源的用于NAT穿透的ICE, STUN和TURN
    STUN, TURN, ICE介绍
    一个boost底下的线程池
    在Windows下编译WebRTC
    FEC(Forward Error Correction)前向纠错 UDPRTP 中使用用于改善无线等网络丢包等问题--转
    FEC之我见四
    FEC之异或运算应用
  • 原文地址:https://www.cnblogs.com/alan-babyblog/p/5447946.html
Copyright © 2011-2022 走看看