zoukankan      html  css  js  c++  java
  • (一)python爬虫实例:猫眼电影TOP100榜并将电影信息写入到Excel

    # -*- coding:utf-8 -*-
    import requests
    from bs4 import BeautifulSoup
    import xlrd,xlwt
    
    urls = [
        "https://maoyan.com/board/4?offset={}".format(i)
        for i in range(0,100,10)
    ]
    
    header = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
                  "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
    }
    
    #电影 主演 时间 国家 评分
    def FilmInformation(url):
        content = []
        r = requests.get(url,headers = header)
        respond = r.text
        soup = BeautifulSoup(respond,"html.parser")
        films = soup.select(".board-item-main")
        for film in films:
            name = (film.select("[title]")[0].text)
            # name = (film.select(".name a")[0].text) 也可以 为什么是0呢,因为返回的是一个列表
            staring = (film.select(".star")[0].text).strip().split("")[1]  #中文的冒号
            releasetime = (film.select(".releasetime")[0].text).split("")[1].split("(")[0]
            country = (film.select(".releasetime")[0].text).split("")[1][10:]
            if country:
                country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
            else:
                country = "(暂无)"
            integer = (film.select(".integer")[0].text)
            fraction = (film.select(".fraction")[0].text)
            score = integer + fraction
            content.append([name,staring,releasetime,country,score])
        return content
    
    def WriteExcel(data):
        title = ["电影","主演","时间","国家","评分"]
        workbook = xlwt.Workbook(encoding = "utf-8")
        sheet = workbook.add_sheet("猫眼前100")
        row = 1
        for i in range(len(title)):
            sheet.write(0,i,title[i])
        for num in data:
            for num_num in num:
                #for num_num_num in num_num:
                for col in range(len(num_num)):
                    sheet.write(row,col,num_num[col])
                row +=1
        workbook.save("maoyan.xls")
    
    def main():
        all = []
        for url in urls:
            result = FilmInformation(url)
            all.append(result)
        WriteExcel(all)
    
    if __name__ =="__main__":
        main()
  • 相关阅读:
    三步搭建精准召回体系,挽回流失用户
    HMS Core Insights第二期直播预告——华为定位技术让你重拾方向感
    如何区分router.push跳转快应用的来源渠道
    华为预测服务的构建原理是什么?该如何训练模型?
    HarmonyOS开发者日干货资料,奉上!
    技术硬核、体验新颖……HarmonyOS开发者日最值得关注的点都在这里
    Js中Proxy对象
    迭代器模式
    ed命令
    百度实习生前端面试面经
  • 原文地址:https://www.cnblogs.com/python-kp/p/12518350.html
Copyright © 2011-2022 走看看