#!/usr/bin/python # -*- coding: utf-8 -*- from bs4 import BeautifulSoup import urllib.request import xlwt import xlrd workbook=xlrd.open_workbook('./step1.xls') row=0 booksheet = workbook.sheet_by_name('Sheet 1') for n in range(booksheet.nrows): name=booksheet.cell_value(n,0) url='http://www.xxx.xxx/'+booksheet.cell_value(n,1) row=row+1 res = urllib.request.urlopen(url) try: html = res.read().decode('gbk') except Exception as err: html = res.read().decode('utf-8') #获取BeautifulSoup对象并按标准缩进格式输出 soup = BeautifulSoup(html,"html.parser") print(soup.prettify()) print(name)
需要安装xlrd,
pip install xlwt
pip install xlrd