zoukankan      html  css  js  c++  java
  • 斗图网

    import requests
    from lxml import etree
    import re
    try:
    start_url = 'https://www.doutula.com/article/list/?page={}'
    page=1
    cout=1
    for page in range(1,605):
    url =start_url.format(page)
    headers={'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
    r=requests.get(url,headers=headers).text
    e=etree.HTML(r)
    source_title=e.xpath('//div[@ class="col-sm-9"]/a/div[@ class="random_title"]/text()')
    source_url=e.xpath('//div[@ class="col-sm-9"]/a/@href')
    for a, g in zip(source_title,source_url):
    title =a
    d_url=g

    img_url=requests.get(d_url,headers=headers).text
    lxml=etree.HTML(img_url)
    img_title=lxml.xpath('//div[@ class="artile_des"]/table/tbody/tr/td/a/img/@alt')
    img_src=lxml.xpath('//div[@ class="artile_des"]/table/tbody/tr/td/a/img/@src')


    for z,w in zip(img_title,img_src):
    major_url=w
    major_title=z


    major = requests.get(major_url)
    with open('斗图资源/{}'.format(major_title+major_url[-4:]),'wb')as f:
    f.write(major.content)
    print("》》》{}下载已完成".format(major_title))
    print("》》》已经下载了{}".format(cout)+"张")
    cout+=1

    except (Exception,IOError):

    print('跳过')
  • 相关阅读:
    我該怎麼辦
    這麼多年興許從來沒有釋放過
    我想做一個快樂的孩子
    纠结
    如何克服心煩氣躁
    如何樹立威嚴
    第零次作业
    第三周作业
    第二周作业,
    第二周作业
  • 原文地址:https://www.cnblogs.com/LQ970811/p/10558933.html
Copyright © 2011-2022 走看看