zoukankan      html  css  js  c++  java
  • 抓取url中图片并保存到本地demo

    import requests
    from lxml import etree
    from furl import furl
    
    url = 'https://dsd.com'
    html = requests.get(url).text
    
    #re.findall('"objURL":"(.*?)",',html, re.S)
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    imgs = [img.xpath('./text()')
            for img in element.xpath('//div[@class="reader-container"]/div//img')]
    
    
    html = '''<div class="mod flow-ppt-mod">
    <div class="page-1 ppt-page-item  batch-50-1" id="pageNo-1">
    <div class="ppt-image-wrap ppt-16-9">
    <img src="https://sdsd.com?pn=1" alt="">
    </div>
    </div>
    <div class="page-2 ppt-page-item  batch-50-1" id="pageNo-2">
    <div class="ppt-image-wrap ppt-16-9">
    <img data-src="https://sdsd.com?pn=2">
    </div>
    </div>'''
    
    
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    
    urls = [url
            for img in element.xpath('//div//img') 
            for url in img.xpath('./@src') + img.xpath('./@data-src')]
    
    
    def download(url):
        try:
            pic = requests.get(url, timeout=5)
        except requests.exceptions.ConnectionError:
            print('图片无法下载')
        #保存图片路径
        #kv = dict([s.split('=') for s in urls[0].split('?')[1].split('&')])
        f = furl(url)
        path = r'C:\Users\Semi-Luy\Desktop\ppt' + '\\' + f.args['pn'] + '.jpg'
        fp = open(path, 'wb')
        fp.write(pic.content)
        fp.close()
    
    print("开始下载图片:\r\n")
    for url in urls:
        print(url)
        download(url)
    

      

  • 相关阅读:
    String和enum的互相转换
    LeetCode: Sort Colors
    LeetCode: Subsets II
    LeetCode: Spiral Matrix II
    LeetCode: Subsets
    LeetCode: Sum Root to Leaf Numbers
    LeetCode: Sqrt(x)
    LeetCode: Sudoku Solver
    LeetCode: Spiral Matrix
    LeetCode: Substring with Concatenation of All Words
  • 原文地址:https://www.cnblogs.com/iupoint/p/15624274.html
Copyright © 2011-2022 走看看