zoukankan      html  css  js  c++  java
  • 漫话爬取

    #!/usr/bin/env python
    # _*_ coding: utf-8 _*_
    # __author__ ='kong'
     
    from selenium import webdriver
    import os
    import time
     
    class GetCartoon(object):
        def __init__(self):
            self.startUrl = "http://www.1kkk.com/ch1-406302/"
            self.browser = self.getBrowser()
            self.saveCartoon()
     
        def getBrowser(self):
            browser = webdriver.PhantomJS()
            try:
                browser.get(self.startUrl)
                browser.implicitly_wait(5)
                print "url get ok"
            except:
                print "url get failed"
            return browser
     
        def createDir(self,fileName):
            if os.path.exists(fileName):
                print "dir is exists"
            else:
                os.makedirs(fileName)
                print "dir create ok"
     
        def saveCartoon(self):
            fileName = self.browser.title.split("_")[0]
            self.createDir(fileName)
            os.chdir(fileName)
            # 总共26页
            sumPage = int(self.browser.find_element_by_xpath("//font[@class='zf40']/span[2]").text)
            i = 1
            while i <= sumPage:
                imgName = str(i)+'.png'
                # html页面保存成图片
                self.bowser.get_screenshot_as_file(imgName)
                i += 1
                # 点击下一页按扭
                NextTag = self.browser.find_element_by_id("next")
                NextTag.click()
                # 3秒后执行保存图片操作
                time.sleep(3)
            self.browser.close()
     
    if __name__ == '__main__':
        GC = GetCartoon()
  • 相关阅读:
    统计数据库表中记录数
    在水晶报表中写一个条件判断语句
    数据库范式
    动态控件的新思路
    连续打印问题的解决
    水晶报表中测试纸张的margins
    向报表中传递参数
    JS实现页面跳转
    在dos下访问ntfs
    时间和字符混合处理
  • 原文地址:https://www.cnblogs.com/HomeG/p/10527146.html
Copyright © 2011-2022 走看看