#coding:utf-8
import re,urllib
def gethtml(url):
page = urllib.urlopen(url)
html=page.read()
return html
def getlink(html):
link = re.findall(r'<td align="left" style="padding-left:20px;">(.*?)</td>',html)
#linklist = re.findall(link,html)
return link
def save(links):
f=open('360.txt','a')
for i in links:
f.write(i+"
")
#f.close()
#print 'ok'
for page in range(11, 200):
url = "https://butian.360.cn/company/lists/page/" +str(page)
html = gethtml(url)
print str(page)+"ye"
links = getlink(html)
print links
save(links)