Python学习笔记_爬虫数据存储为xlsx格式的方法

import requests
from bs4 import BeautifulSoup
import openpyxl

wb=openpyxl.Workbook() 
sheet=wb.active
sheet.title='product1' 

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}

for x in range(20):
    res = requests.get("https://promiseedental.en.made-in-china.com/product-list-"+str(x)+".html",headers=headers,timeout=120)#原始地址
    soup = BeautifulSoup(res.text,"html.parser")
    item_all = soup.find_all('div',class_="prod-image")#获取原始地址中每个产品的详情链接

    for item1 in item_all:
        try:
            url_p = item1.find("a")["href"]#详情链接
            res1 = requests.get(url_p)
            soup1 = BeautifulSoup(res1.text,"html.parser")
            item2 = soup1.find_all('div',class_="swiper-wrapper")

            for item3 in item2:
                item3_str = str(item3)
                lst = item3_str.split("<")
                sheet.append(lst)
                print("ok")
              
        except Exception as e:
             print('---->', e)


wb.save('product.xlsx')

查看全文

相关阅读:
.NET 4 上的REST 框架
 WCF Web API 说再见，继承者ASP.NET Web API
基于盛大的云数据库系统 MongoIC 构建图片系统
 微软以Apache许可协议开源ASP.NET MVC
Redis 起步
 HttpClient介绍
 Quartz.NET 2.0正式发布
 CodeFirst Migrations随Entity Framework 4.3一同发布
 Redis 在Centos Linux 上的启动脚本
 Quartz.NET的管理工具

原文地址：https://www.cnblogs.com/waterr/p/14022413.html