zoukankan      html  css  js  c++  java
  • Python 爬取妹子图(技术是无罪的)

    ...

    import requests
    from bs4 import BeautifulSoup
    import os
    import sys
    
    class mzitu():
        def html(self, href):
            html = self.request(href)
            a = BeautifulSoup(html.text, 'lxml').find('h2', class_='main-title')
            title = a.get_text()
            print(u'开始保存:', title)
            path = str(title)
            self.mkdir(path)
            max_span = BeautifulSoup(html.text, 'lxml').find('div', class_='pagenavi').find_all('span')[-2].get_text()
            for page in range(1, int(max_span) + 1):
                page_url = href + '/' + str(page)
                self.img(page_url)
        def img(self, page_url):
            img_html = self.request(page_url)
            img_url = BeautifulSoup(img_html.text, 'lxml').find('div', class_='main-image').find('img')['src']
            self.save(img_url)
        def save(self, img_url):
            name = img_url[-6:-4]
            img = self.request(img_url)
            with open(name + '.jpg','ab') as f:
                f.write(img.content)
        def request(self, url): 
            headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}
            content = requests.get(url, headers=headers)
            return content
        def mkdir(self, path): ##这个函数创建文件夹
            path = path.strip()
            isExists = os.path.exists(os.path.join("D:mzitu", path))
            if not isExists:
                print(u'创建', path, u'文件夹')
                os.makedirs(os.path.join("E:MZITU", path))
                os.chdir(os.path.join("E:mzitu", path)) ##切换到目录
                return True
            else:
                print(u'名字叫做', path, u'的文件夹已经存在了')
                return False
    Mzitu = mzitu()
    Mzitu.html('http://www.mzitu.com/92251')

    ...

  • 相关阅读:
    JQuery对象操作支持链式法则源码分析
    JQuery + JSON作为前后台数据交换格式实践
    JQuery html API支持解析执行Javascript脚本功能实现-代码分析
    跨域访问实践
    XP下安装MAC OS虚拟系统
    Android APP开发笔记
    CSS浮动与清浮动
    LUA 模块化编程例子
    JavaScript解决命名冲突的一种方法
    XML中文本节点存储任意字符的方法
  • 原文地址:https://www.cnblogs.com/mysterious-killer/p/10155950.html
Copyright © 2011-2022 走看看