zoukankan      html  css  js  c++  java
  • ----爬虫(某东)----

    import urllib.request as eq
    import re
    url='https://search.jd.com/Search?keyword=%E5%85%85%E6%B0%94%E5%A8%83%E5%A8%83&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E5%85%85%E6%B0%94%E5%A8%83%E5%A8%83&page=3&s=63&click=0'
    html1=eq.urlopen(url).read()
    html1=str(html1)
    pat2='<img width="220" height="220" class="err-product" data-img="1" src="//(.+?.jpg)" />'
    imagelist=re.compile(pat2).findall(html1)
    print(imagelist)
    x=1
    for imageurl in imagelist:
        imagename='D:/python36/pachong/img1/'+str(x)+'.jpg'
        imageurl='http://'+imageurl
        print(imageurl)
        try:
            eq.urlretrieve(imageurl,filename=imagename)
        except eq.URLError as e:
            if hasattr(e,'code'):
                x=x+1
            if hasattr(e,'reason'):
                x=x+1
        x=x+1
    

      

  • 相关阅读:
    问题2017S03
    问题2017S02
    高等代数问题1
    无穷积分换元法的严格解释
    线性空间的同构理论
    问题2017S01
    朴素贝叶斯分类
    决策树
    温习MATLAB
    感知机
  • 原文地址:https://www.cnblogs.com/w-s-l123/p/10046537.html
Copyright © 2011-2022 走看看