zoukankan      html  css  js  c++  java
  • python爬取豆瓣电影top250数据存入excel

    # -*- coding:utf-8 -*-
    """获取时光影评电影"""
    import requests
    from bs4 import BeautifulSoup
    from datetime import datetime,timedelta
    import pymysql
    #xlwt-1.3.0
    #from xlwt.xlwt import *
    from xlwt import *
    #获取指定开始排行的电影url
    def get_url(root_url,start):
        return root_url+"?start="+str(start)+"&"
    
    def get_review(page_url):
    
        """获取电影相关的信息"""
        movies_list = []
        reponse = requests.get(page_url)
        soup =BeautifulSoup(reponse.text,'lxml')
        soup = soup.find("ol","grid_view")
        dict ={}
        for tag_li in soup.find_all("li"):
            dict = {}
            dict['rank'] = tag_li.find("em").string
            dict['title'] = tag_li.find_all("span","title")[0].string
            dict['score'] = tag_li.find("span","rating_num").string
            if tag_li.find("span","inq"):
                dict['desc'] =tag_li.find("span","inq").string
            else:
                dict['desc'] = '无评词'
    
            movies_list.append(dict)
        return movies_list
    
    def save_excel(movies_list):
        keys =""
        w = Workbook()
        ws = w.add_sheet("movies")
        for i in movies_list:
            keys = list(i.keys())
    
    
        for i in range(len(keys)):
            ws.write(0,i,keys[i])
    
        for movies in range(len(movies_list)):
    
            for key,value in movies_list[movies].items():
    
                keys = list(movies_list[movies].keys())
                #找到key的index
                ws.write(movies+1,keys.index(key),value)
    
            w.save("movies.xls")
    
    
    if __name__ == '__main__':
    
        root_url = "https://movie.douban.com/top250"
        start =0
        movies_list =get_review(get_url(root_url,start))
        save_excel(movies_list)

     

  • 相关阅读:
    DataGridView 实现,折叠的Tree效果
    DEV 总结
    EWS:邮箱的一个开放的接口服务
    socket,模拟服务器、客户端通信
    在ASP.NET Core中构建路由的5种方法
    扩展方法、泛型、委托,的小案例
    操作Work、Excel、PDF
    d3实现家族树
    大数据时代的图表可视化利器——highcharts,D3和百度的echarts
    函数防抖与节流
  • 原文地址:https://www.cnblogs.com/venvive/p/11349527.html
Copyright © 2011-2022 走看看