zoukankan      html  css  js  c++  java
  • python3 面向对象编程--类的封装和继承

    #python3
    import re
    from urllib import request
    import os

    class PosterSpider(object):
         def __init__(self, path='./'):
               self.path = path

    #the entry of this process

         def getStart(self, url):
               page = self.reqPage(url)

               page = page.decode('utf-8')

               for mvname,mvurl in self.getMvInfo(page):
                    self.save2File(mvname,mvurl)

               nexturl = self.getNextpage(page)
               if nexturl:
                   self.getStart(nexturl)


    #request the page
          def reqPage(self, url):
               response = request.urlopen(url)
               if response.code == 200:
                  page = response.read()
                  return page

    #get the fname and url for each mv
           def getMvInfo(self, page):
                 urllist = re.findall(r'<img.*alt="(.*?)".*src="(.*?)"',page)
                 return urllist

    #get next page url
           def getNextpage(self, page):
                 url = 'https://movie.douban.com/top250'
                 nexturlraw = re.findall(r'<link rel="next" href="(.*?)"',page)
                 if nexturlraw:
                    nexturllast = url + nexturlraw[0]
                    return nexturllast

    #save img to file
        def save2File(self, fname, url):
              image = self.reqPage(url)
              fname = fname + '.jpg'
              fpath = os.path.join(self.path,fname)
              with open(fpath,'wb') as f:
                      f.write(image)

    if __name__ == '__main__':
        url = 'https://movie.douban.com/top250'
        spider = PosterSpider('img')
        spider.getStart(url)

  • 相关阅读:
    DateTime.Now的精度这么高! (转)
    RC4经典加密算法VB版本代码
    反拍卖采购技术的应用
    用C#写 四舍五入函数(函数版)
    CRM(客户关系管理)
    给按钮增加属性
    用C#写 四舍五入函数(原理版)
    合并datagrid中内容相同的单元格(VB.Net)
    DataGrid删除确认及Item颜色交替
    得到目录大小
  • 原文地址:https://www.cnblogs.com/hixiaowei/p/9695253.html
Copyright © 2011-2022 走看看