zoukankan      html  css  js  c++  java
  • 抓取url中图片并保存到本地demo

    import requests
    from lxml import etree
    from furl import furl
    
    url = 'https://dsd.com'
    html = requests.get(url).text
    
    #re.findall('"objURL":"(.*?)",',html, re.S)
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    imgs = [img.xpath('./text()')
            for img in element.xpath('//div[@class="reader-container"]/div//img')]
    
    
    html = '''<div class="mod flow-ppt-mod">
    <div class="page-1 ppt-page-item  batch-50-1" id="pageNo-1">
    <div class="ppt-image-wrap ppt-16-9">
    <img src="https://sdsd.com?pn=1" alt="">
    </div>
    </div>
    <div class="page-2 ppt-page-item  batch-50-1" id="pageNo-2">
    <div class="ppt-image-wrap ppt-16-9">
    <img data-src="https://sdsd.com?pn=2">
    </div>
    </div>'''
    
    
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    
    urls = [url
            for img in element.xpath('//div//img') 
            for url in img.xpath('./@src') + img.xpath('./@data-src')]
    
    
    def download(url):
        try:
            pic = requests.get(url, timeout=5)
        except requests.exceptions.ConnectionError:
            print('图片无法下载')
        #保存图片路径
        #kv = dict([s.split('=') for s in urls[0].split('?')[1].split('&')])
        f = furl(url)
        path = r'C:\Users\Semi-Luy\Desktop\ppt' + '\\' + f.args['pn'] + '.jpg'
        fp = open(path, 'wb')
        fp.write(pic.content)
        fp.close()
    
    print("开始下载图片:\r\n")
    for url in urls:
        print(url)
        download(url)
    

      

  • 相关阅读:
    SQL清除数据库日志方法
    TFS服务器及服务帐号迁域的处理
    about WBS
    ASP.NET URL Rewrite. URL重写
    查看SQL Server中某数据库下每个表占用的空间大小
    [西安招聘] 微软西安分公司 招聘.NET软件工程师,MOSS开发工程师
    企业微信的数据打通
    常见Post请求与实现
    Python解释器与__pycache__文件夹的生成
    微信开放平台OpenID与UnionID的区别
  • 原文地址:https://www.cnblogs.com/iupoint/p/15624274.html
Copyright © 2011-2022 走看看