zoukankan      html  css  js  c++  java
  • Day32

    1、爬虫

    import re
    from urllib.request import urlopen
    
    def getPage(url):
        response = urlopen(url)
        return response.read().decode('utf-8')
    
    def parsePage(s):
        com = re.compile(
            '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>d+).*?<span class="title">(?P<title>.*?)</span>'
            '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>', re.S)
    
        ret = com.finditer(s)
        for i in ret:
            yield {
                "id": i.group("id"),
                "title": i.group("title"),
                "rating_num": i.group("rating_num"),
                "comment_num": i.group("comment_num"),
            }
    
    def main(num):
        url = 'https://movie.douban.com/top250?start=%s&filter=' % num
        response_html = getPage(url)
        ret = parsePage(response_html)
        print(ret)
        f = open("move_info7", "a", encoding="utf8")
        for obj in ret:
            print(obj)
            data = str(obj)
            f.write(data + "
    ")
        f.close()
    
    if __name__ == '__main__':
        count = 0
        for i in range(10):
            main(count)
            count += 25
    View Code
  • 相关阅读:
    django之admin管理工具
    django之中间件
    cookie和session
    day052-53 django框架
    day050 前端Jquery库的使用
    sprint
    Scrum 项目1.0
    【团队项目】3.0
    [读书笔记]
    【团队项目】2.0
  • 原文地址:https://www.cnblogs.com/a352735549/p/8981721.html
Copyright © 2011-2022 走看看