zoukankan      html  css  js  c++  java
  • python多线程爬虫:亚马逊价格

    import re
    import requests
    
    import threading
    import time
    from time import ctime,sleep
    
    from queue import Queue
    
    
    keywords_a=[
        'ELPLP80',
        'ELPLP23',
        'ELPLP29',
        'NP14LP',
        'POA-LMP126',
        'ELPLP66',
    ]
    
    
    
    keywords_b=[
        'VIP230W0.8E20.8',
        'VIP240W0.8E20.9N',
        'NP30LP',
        'LMP-C162',
        'VT70LP',
    ]
    
    
    
    keywords_c= [
        'TLPLV4',
        'POA-LMP131',
        'BL-FP240A',
        'VLT-XD3200LP',
        'ET-LAD35',
        'BL-FU240A',
        '20-01032-20',
    
    
    ]
    
    keywords_d =[
        'ELPLP76',
        'VLT-HC3800LP',
        'BL-FP240C',
        '5811116765-S',
        'ELPLP69',
        'BL-FP200H',
    
    ]
    
    
    keywords_e = [
        '5100MP',
        'RLC-057',
        'ELPLP71',
        'ELPLP64',
        'BL-FS300B',
    ]
    
    
    Re_rule = '<span class="a-size-base a-color-price s-price a-text-bold">(.*?)</span>'  #正则表达式匹配价格
    
    headers_am = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36'}
    #浏览器页面headers
    def Scraper(kw):
        for i in kw:
            url_keyword = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}'.format(i)
    #亚马逊页面链接 url_Epharos = 'http://www.amazon.com/s/ref=nb_sb_noss?url=srs%3D9143518011%26search-alias%3Dspecialty-aps&field-keywords={}'.format(i) #店铺链接 response = requests.get(url_keyword,headers=headers_am) #requests.get(url,params=params) 可以提交表单用request.post(url,params=params)
    response_a = requests.get(url_Epharos,headers=headers_am) price = re.findall(Re_rule,response.text) price_e = re.findall(Re_rule,response_a.text) print('--------------------------{0}爬去完毕at{1}j结果: 市场价:{2} Epharos:{3}'.format(i,time.ctime(),price[0],price_e[0])) time.sleep(1) threads = [] t1 = threading.Thread(target=Scraper,args=(keywords_a,)) #args为元祖 threads.append(t1) t2 = threading.Thread(target=Scraper,args=(keywords_b,)) threads.append(t2) t3 = threading.Thread(target=Scraper,args=(keywords_c,)) threads.append(t3) t4 = threading.Thread(target=Scraper,args=(keywords_d,)) threads.append(t4) t5 = threading.Thread(target=Scraper,args=(keywords_e,)) threads.append(t5) if __name__ == '__main__': for t in threads: t.setDaemon(True) #守护线程 t.start() for t in threads: t.join() #等待子线程 print ("all over %s" %ctime())

      

  • 相关阅读:
    java环境基础步骤 svn
    java环境基础步骤 jdk tomcat eclipse
    @ModelAttribute 注解及 POJO入参过程
    cookie小记
    jquery指index
    Js文件中文乱码
    Redis基础(转)
    eclipse导出jar包的方法
    简单谈谈如何利用h5实现音频的播放
    yii2 GridView 下拉搜索实现案例教程
  • 原文地址:https://www.cnblogs.com/alan-babyblog/p/5456875.html
Copyright © 2011-2022 走看看