#-*- coding:utf-8 -*- from urllib import request from bs4 import BeautifulSoup from urllib import parse import pymysql from sqlalchemy import * from sqlalchemy.orm import * def getYao(url): url = url urlFirst = request.Request(url) urlFirst.add_header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36") urlFirst.add_header("Origin", "http://search.51job.com") postData = parse.urlencode([ ("s", "01"), ("t", "0"), ]) print(postData) return_ = request.urlopen(urlFirst, data=postData.encode("gbk")) contentNei = return_.read().decode("gbk") neisp = BeautifulSoup(contentNei,"html.parser") return neisp.find("div",class_="job_msg").get_text() engine=create_engine("mysql://root:root@localhost:3306/laravel?charset=utf8",echo=True) metadata=MetaData(engine) users_table = Table("jobs",metadata,autoload=True) for i in list(range(1,11)): url = "http://search.51job.com/list/000000,000000,0000,00,9,99,C,2,"+str(i)+".html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=102&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=" rep = request.Request(url) rep.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36") rep.add_header("Origin","http://search.51job.com") postData = parse.urlencode([ ("fromJs", "1"), ("jobarea", "040000"), ("keyword", "php"), ("keywordtype", "2"), ("lang", "c"), ("stype", "2"), ("postchannel", "0000"), ("fromType", "1"), ("confirmdate", "9") ]) print(postData) return_ = request.urlopen(rep,data=postData.encode("gbk")) content = return_.read().decode("gbk") sp = BeautifulSoup(content,"html.parser") print(content) sql_moban = users_table.insert() info_set = set([]) j = 0 for i in sp.find("div",class_="dw_table").find_all("div",class_="el"): if j==0: j = j + 1 continue j = j + 1 getYao(i.find('a').get('href')) result = sql_moban.execute(zhiwei=i.find("a").get_text().strip(), company=i.find("span",class_="t2").string,address=i.find("span",class_="t3").string,slary=i.find("span",class_="t4").string,riqi=i.find("span",class_="t5").string,yaoqiu=getYao(i.find('a').get('href'))) print("下载完成") print(info_set) #print(sql_moban)
将爬到的数据写到数据库中,php工资真不高;