zoukankan      html  css  js  c++  java
  • python爬取豆瓣电影top250数据存入excel

    # -*- coding:utf-8 -*-
    """获取时光影评电影"""
    import requests
    from bs4 import BeautifulSoup
    from datetime import datetime,timedelta
    import pymysql
    #xlwt-1.3.0
    #from xlwt.xlwt import *
    from xlwt import *
    #获取指定开始排行的电影url
    def get_url(root_url,start):
        return root_url+"?start="+str(start)+"&"
    
    def get_review(page_url):
    
        """获取电影相关的信息"""
        movies_list = []
        reponse = requests.get(page_url)
        soup =BeautifulSoup(reponse.text,'lxml')
        soup = soup.find("ol","grid_view")
        dict ={}
        for tag_li in soup.find_all("li"):
            dict = {}
            dict['rank'] = tag_li.find("em").string
            dict['title'] = tag_li.find_all("span","title")[0].string
            dict['score'] = tag_li.find("span","rating_num").string
            if tag_li.find("span","inq"):
                dict['desc'] =tag_li.find("span","inq").string
            else:
                dict['desc'] = '无评词'
    
            movies_list.append(dict)
        return movies_list
    
    def save_excel(movies_list):
        keys =""
        w = Workbook()
        ws = w.add_sheet("movies")
        for i in movies_list:
            keys = list(i.keys())
    
    
        for i in range(len(keys)):
            ws.write(0,i,keys[i])
    
        for movies in range(len(movies_list)):
    
            for key,value in movies_list[movies].items():
    
                keys = list(movies_list[movies].keys())
                #找到key的index
                ws.write(movies+1,keys.index(key),value)
    
            w.save("movies.xls")
    
    
    if __name__ == '__main__':
    
        root_url = "https://movie.douban.com/top250"
        start =0
        movies_list =get_review(get_url(root_url,start))
        save_excel(movies_list)

     

  • 相关阅读:
    django常用命令集合 待完善
    InSAR 数据
    InSAR 处理流程和原理
    InSAR 处理软件
    InSAR 参考书目,文献推荐
    InSAR
    小程序测试方案
    【非原创】测试环境的目的
    【非原创】测试的职责
    api自动生成思路
  • 原文地址:https://www.cnblogs.com/venvive/p/11349527.html
Copyright © 2011-2022 走看看