zoukankan      html  css  js  c++  java
  • python3 面向对象编程--类的封装和继承

    #python3
    import re
    from urllib import request
    import os

    class PosterSpider(object):
         def __init__(self, path='./'):
               self.path = path

    #the entry of this process

         def getStart(self, url):
               page = self.reqPage(url)

               page = page.decode('utf-8')

               for mvname,mvurl in self.getMvInfo(page):
                    self.save2File(mvname,mvurl)

               nexturl = self.getNextpage(page)
               if nexturl:
                   self.getStart(nexturl)


    #request the page
          def reqPage(self, url):
               response = request.urlopen(url)
               if response.code == 200:
                  page = response.read()
                  return page

    #get the fname and url for each mv
           def getMvInfo(self, page):
                 urllist = re.findall(r'<img.*alt="(.*?)".*src="(.*?)"',page)
                 return urllist

    #get next page url
           def getNextpage(self, page):
                 url = 'https://movie.douban.com/top250'
                 nexturlraw = re.findall(r'<link rel="next" href="(.*?)"',page)
                 if nexturlraw:
                    nexturllast = url + nexturlraw[0]
                    return nexturllast

    #save img to file
        def save2File(self, fname, url):
              image = self.reqPage(url)
              fname = fname + '.jpg'
              fpath = os.path.join(self.path,fname)
              with open(fpath,'wb') as f:
                      f.write(image)

    if __name__ == '__main__':
        url = 'https://movie.douban.com/top250'
        spider = PosterSpider('img')
        spider.getStart(url)

  • 相关阅读:
    软件工程概论第十二周学习进度
    冲刺第十天
    评价搜狗输入法
    冲刺第九天
    冲刺第八天
    冲刺第七天
    冲刺第六天
    冲刺第五天
    软件工程概论第十一周学习进度
    冲刺第四天
  • 原文地址:https://www.cnblogs.com/hixiaowei/p/9695253.html
Copyright © 2011-2022 走看看