zoukankan      html  css  js  c++  java
  • 下载百度贴吧图片

    使用正则表达式

    # -*- coding: utf-8 -*-
    import urllib
    import re
    
    
    def get_content(url):
        '''
        获取网页内容
        '''
        html = urllib.urlopen(url)
        content = html.read()
        html.close()
        return content
    
    
    def get_images(info):
        '''
        获取需要的图片
        '''
        '''
        <img class="BDE_Image" src="http://imgsrc.baidu.com/forum/w%3D580/
        sign=269396684d4a20a4311e3ccfa0539847/0aa95edf8db1cb132cd1f269df54564e92584b15.jpg"
        pic_ext="jpeg" width="510" height="765">
        '''
        regex = r'class="BDE_Image" src="(.+?.jpg)"'
        mod = re.compile(regex)
        images = re.findall(mod, info)
    
        i = 0
        for image in images:
            urllib.urlretrieve(image, "images/%s.jpg" % i)
            i = i + 1
    
    
    url = "http://tieba.baidu.com/p/2772656630"
    info = get_content(url)
    get_images(info)
    

    使用BeautifulSoup

    # -*- coding: utf-8 -*-
    import urllib
    from bs4 import BeautifulSoup
    
    
    def get_content(url):
        '''
        获取网页内容
        '''
        html = urllib.urlopen(url)
        content = html.read()
        html.close()
        return content
    
    
    def get_images(info):
        '''
        获取需要的图片
        '''
    
        soup = BeautifulSoup(info)
        images = soup.find_all(class_="BDE_Image")
        i = 1
        for image in images:
            image_add = image.get('src')
            print i, '--', image_add
            if i < 10:
                i = '00' + str(i)
            elif i < 100:
                i = '0' + str(i)
    
            image_add = image.get('src')
            urllib.urlretrieve(image_add, "images/bs4-%s.jpg" % i)
            i = int(i) + 1
    
    
    url = "http://tieba.baidu.com/p/2772656630"
    info = get_content(url)
    get_images(info)
    
  • 相关阅读:
    C#编程:依赖倒置原则DIP
    java项目与javaweb项目导入jar包的区别
    《C#从入门到精通(第3版)》目录
    Sublime Text 格式化代码
    ThinkPHP 入门
    CentOS 7.2配置LAMP环境——yum版
    打包名命令:tar
    不规则数组的构建
    Linux文件权限概念
    tomcat启动成功但是没有监听8080端口
  • 原文地址:https://www.cnblogs.com/keer2345/p/6011966.html
Copyright © 2011-2022 走看看