zoukankan      html  css  js  c++  java
  • Day32

    1、爬虫

    import re
    from urllib.request import urlopen
    
    def getPage(url):
        response = urlopen(url)
        return response.read().decode('utf-8')
    
    def parsePage(s):
        com = re.compile(
            '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>d+).*?<span class="title">(?P<title>.*?)</span>'
            '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>', re.S)
    
        ret = com.finditer(s)
        for i in ret:
            yield {
                "id": i.group("id"),
                "title": i.group("title"),
                "rating_num": i.group("rating_num"),
                "comment_num": i.group("comment_num"),
            }
    
    def main(num):
        url = 'https://movie.douban.com/top250?start=%s&filter=' % num
        response_html = getPage(url)
        ret = parsePage(response_html)
        print(ret)
        f = open("move_info7", "a", encoding="utf8")
        for obj in ret:
            print(obj)
            data = str(obj)
            f.write(data + "
    ")
        f.close()
    
    if __name__ == '__main__':
        count = 0
        for i in range(10):
            main(count)
            count += 25
    View Code
  • 相关阅读:
    48. Rotate Image
    47. Permutations II
    46. Permutations
    45. Jump Game II
    44. Wildcard Matching
    43. Multiply Strings
    42. Trapping Rain Water
    41. First Missing Positive
    40. Combination Sum II
    39. Combination Sum
  • 原文地址:https://www.cnblogs.com/a352735549/p/8981721.html
Copyright © 2011-2022 走看看