zoukankan html css js c++ java

（二）python爬虫实例：猫眼电影TOP100榜并将电影信息写入到Excel（Excel列宽自适应）

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import xlrd,xlwt

urls = [
    "https://maoyan.com/board/4?offset={}".format(i)
    for i in range(0,100,10)
]

header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
              "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
length = {}



def len_byte(value):
    length = len(value)
    utf8_length = len(value.encode('utf-8'))
    length = (utf8_length - length)/2 + length
    return int(length)

#电影 主演 时间 国家 评分
def FilmInformation(url):
    content = []
    r = requests.get(url,headers = header)
    respond = r.text
    soup = BeautifulSoup(respond,"html.parser")
    films = soup.select(".board-item-main")
    for film in films:
        name = (film.select("[title]")[0].text)
        # name = (film.select(".name a")[0].text) 也可以 为什么是0呢，因为返回的是一个列表
        staring = (film.select(".star")[0].text).strip().split("：")[1]  #中文的冒号
        releasetime = (film.select(".releasetime")[0].text).split("：")[1].split("(")[0]
        country = (film.select(".releasetime")[0].text).split("：")[1][10:]
        if country:
            country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
        else:
            country = "(暂无)"
        integer = (film.select(".integer")[0].text)
        fraction = (film.select(".fraction")[0].text)
        score = integer + fraction
        content.append([name,staring,releasetime,country,score])
    return content

def WriteExcel(data):
    global length
    title = ["电影","主演","时间","国家","评分"]
    workbook = xlwt.Workbook(encoding = "utf-8")
    sheet = workbook.add_sheet("猫眼前100")
    row = 1
    for i in range(len(title)):
        sheet.write(0,i,title[i])
    for num in data:
        for num_num in num:
            #for num_num_num in num_num:
            for col in range(len(num_num)):
                sheet.write(row,col,num_num[col])
                if col in length:
                    if length[col] < len(num_num[col]):
                        length[col] = len(num_num[col])
                else:
                    length.setdefault(col, len(num_num[col]))
            row +=1
    for key,value in length.items():
        sheet.col(key).width = int(256*value*2)

    workbook.save("maoyan.xls")



def main():
    all = []
    for url in urls:
        result = FilmInformation(url)
        all.append(result)
    WriteExcel(all)

if __name__ =="__main__":
    main()
    print (length)

查看全文

相关阅读:
干货—MySQL常见的面试题＋索引原理分析！
如何设计一个百万级的消息推送系统
 【金三银四跳槽季】Java工程师如何在1个月内做好面试准备？
Nginx实现请求的负载均衡 + keepalived实现Nginx的高可用
 java函数式编程之Supplier
SpringMVC + MyBatis + Mysql + Redis(作为二级缓存) 配置
 Redis创建集群报错
 阿里云服务器Tomcat无法从外部访问
 SSM框架学习之高并发秒杀业务--笔记5-- 并发优化
 在windows上部署使用Redis

原文地址：https://www.cnblogs.com/python-kp/p/12519311.html