zoukankan      html  css  js  c++  java
  • 下载百度贴吧图片

    使用正则表达式

    # -*- coding: utf-8 -*-
    import urllib
    import re
    
    
    def get_content(url):
        '''
        获取网页内容
        '''
        html = urllib.urlopen(url)
        content = html.read()
        html.close()
        return content
    
    
    def get_images(info):
        '''
        获取需要的图片
        '''
        '''
        <img class="BDE_Image" src="http://imgsrc.baidu.com/forum/w%3D580/
        sign=269396684d4a20a4311e3ccfa0539847/0aa95edf8db1cb132cd1f269df54564e92584b15.jpg"
        pic_ext="jpeg" width="510" height="765">
        '''
        regex = r'class="BDE_Image" src="(.+?.jpg)"'
        mod = re.compile(regex)
        images = re.findall(mod, info)
    
        i = 0
        for image in images:
            urllib.urlretrieve(image, "images/%s.jpg" % i)
            i = i + 1
    
    
    url = "http://tieba.baidu.com/p/2772656630"
    info = get_content(url)
    get_images(info)
    

    使用BeautifulSoup

    # -*- coding: utf-8 -*-
    import urllib
    from bs4 import BeautifulSoup
    
    
    def get_content(url):
        '''
        获取网页内容
        '''
        html = urllib.urlopen(url)
        content = html.read()
        html.close()
        return content
    
    
    def get_images(info):
        '''
        获取需要的图片
        '''
    
        soup = BeautifulSoup(info)
        images = soup.find_all(class_="BDE_Image")
        i = 1
        for image in images:
            image_add = image.get('src')
            print i, '--', image_add
            if i < 10:
                i = '00' + str(i)
            elif i < 100:
                i = '0' + str(i)
    
            image_add = image.get('src')
            urllib.urlretrieve(image_add, "images/bs4-%s.jpg" % i)
            i = int(i) + 1
    
    
    url = "http://tieba.baidu.com/p/2772656630"
    info = get_content(url)
    get_images(info)
    
  • 相关阅读:
    定时任务 常用cron表达式
    链接UTF-8编码带BOM尾,访问出现404
    centos安装nginx
    cnetos安装jdk
    javaweb数据分页
    mvn clean install对idea中项目结构的影响
    spring使用servlet过滤器filter,进行登录校验
    docker安装mysql并修改远程登陆权限
    使用token令牌控制接口幂等性
    718.最长重复子数组
  • 原文地址:https://www.cnblogs.com/keer2345/p/6011966.html
Copyright © 2011-2022 走看看