talk is cheap...show you the code.....
1 import requests 2 import lxml,time,os 3 from bs4 import BeautifulSoup as sb 4 from xlwt import * 5 import sys 6 reload(sys) 7 sys.setdefaultencoding('utf-8') 8 print sys.getdefaultencoding() 9 10 book = Workbook(encoding = "utf-8") 11 table = book.add_sheet("test1") 12 table.write(0,0,'number') 13 table.write(0,1,'position') 14 table.write(0,2,'feedback') 15 table.write(0,3,'company') 16 table.write(0,4,'salary') 17 table.write(0,5,'address ') 18 table.write(0,6,"updatetime") 19 table.write(0,7,"details") 20 headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"} 21 22 for num in range(90): 23 url = 'http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E4%B8%9C%E8%8E%9E&p='+str(num) 24 print url 25 res = requests.get(url,headers = headers) 26 html = sb(res.text,'lxml') 27 zwmc = html.find_all('td',class_="zwmc") 28 fk_lv = html.find_all('td',class_="fk_lv") 29 gsmc = html.find_all('td',class_="gsmc") 30 zwyx = html.find_all('td',class_="zwyx") 31 gzdd = html.find_all('td',class_="gzdd") 32 gxsj = html.find_all('td',class_="gxsj") 33 details = html.find_all('li',class_="newlist_deatil_last") 34 row = num*len(zwmc) 35 for i in range(1,len(zwmc)): 36 print zwmc[i].text.strip()+"---"+fk_lv[i].text.strip()+"---"+ gsmc[i].text.strip()+"---"+ zwyx[i].text.strip()+"---"+ gzdd[i].text.strip()+"---"+gxsj[i].text.strip() 37 table.write(row+i,0,row+i) 38 table.write(row+i,1,zwmc[i].text.strip()) 39 table.write(row+i,2,fk_lv[i].text.strip()) 40 table.write(row+i,3,gsmc[i].text.strip()) 41 table.write(row+i,4,zwyx[i].text.strip()) 42 table.write(row+i,5,gzdd[i].text.strip()) 43 table.write(row+i,6,gxsj[i].text.strip()) 44 table.write(row+i,7,details[i].text.strip()) 45 book.save('result.xls')