抓取最新招聘和系统招聘:
#coding=utf8 import urllib,sys from bs4 import BeautifulSoup reload(sys) sys.setdefaultencoding("utf-8") def gethtml(url,pageid): page=urllib.urlopen('%s%d'%(url,pageid)) html=page.read().decode('gbk') return BeautifulSoup(html,'html.parser') def getitem(html): comlist=html.find(attrs={'class':'listborder'}).findAll('a') return [x['title'].encode() for x in comlist] if __name__=="__main__": with open('com.txt','a') as f1: for i in range(1,87): html=gethtml('http://job.ucas.ac.cn/home/news/be59ab33-e946-48ed-a0eb-0d3e97b62870?page=',i) comlist=getitem(html) for i in comlist: f1.write(i+' ') with open('sys.txt','a') as f2: for i in range(1,9): html=gethtml('http://job.ucas.ac.cn/home/news/56966245-bd19-43ca-bafe-404b4a4c57a5?page=',i) comlist=getitem(html) for i in comlist: f2.write(i+' ')