zoukankan      html  css  js  c++  java
  • 03 获取豆瓣电影top250

    ''''''
    '''
    https://movie.douban.com/top250?start=0&filter=
    https://movie.douban.com/top250?start=25&filter=
    https://movie.douban.com/top250?start=50&filter=
    
    1、发送请求
    2、解析数据
    3、保存数据
    '''
    import requests
    import re
    #爬虫三部曲
    #1、发送请求
    def get_page(base_url):
        response = requests.get(base_url)
        return response
    
    #2、解析文本
    def parse_index(text):
        res = re.findall('<div class="item">.*?<em class="">(.*?)</em>.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>'
                         '.*?导演:(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)人评价</span>'
                         '.*?<span class="inq">(.*?)</span>',text,re.S)
        #print(res)
        return res
    
    #3、保存数据
    def save_data(data):
        with open('douban.txt','a',encoding='utf-8') as f:
            f.write(data)
    
    
    
    #main + 回车键
    if __name__ == '__main__':
        #num = 10
        #base_url = 'https://movie.douban.com/top250?start={}&filter='.format(num)
    
        num = 0
        for line in range(10):
            base_url = f'https://movie.douban.com/top250?start={num}&filter='
            num += 25
            print(base_url)
    
            #1、发送请求,调用函数
            response = get_page(base_url)
    
            #2、解析文本
            movie_list = parse_index(response.text)
    
            #3、保存数据
            #数据的格式化
            for movie in movie_list:
                #print(movie)
    
                #解压赋值
                #电影排名、电影url、电影名称、导演 - 主演 - 类型,电影评价,评价人数,电影简介
                v_top,v_url,v_name,v_daoyan,v_point,v_num,v_desc = movie
    
    
                movie_content = f'''
                电影排名:{v_top}
                电影url:{v_url}
                电影名称:{v_name}
                电影主演:{v_daoyan}
                电影评分:{v_point}
                评价人数:{v_num}
                电影简介:{v_desc}
                
    
                '''
                print(movie_content)
    
                #保存数据
                save_data(movie_content)
  • 相关阅读:
    Balanced Binary Tree
    Convert Sorted List to Binary Search Tree
    Convert Sorted Array to Binary Search Tree
    Binary Tree Zigzag Level Order Traversal
    Validate Binary Search Tree
    Binary Tree Level Order Traversal II
    Binary Tree Level Order Traversal
    Maximum Depth of Binary Tree
    如何把U盘的两个盘或者多个盘合成一个
    bugku 想蹭网先解开密码
  • 原文地址:https://www.cnblogs.com/urassya/p/11093872.html
Copyright © 2011-2022 走看看