前天用正则的方式获取网站的代理IP数据,今天为了学习BeautifulSoup,用BeautifulSoup实现了一下。
1 #!/usr/bin/python 2 3 import requests 4 from bs4 import BeautifulSoup 5 6 7 headers={'Host':"www.ip-adress.com", 8 'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0", 9 'Accept':"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 10 'Accept-Language':"zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3", 11 'Accept-Encoding':"gzip, deflate", 12 'Referer':"http://www.ip-adress.com/Proxy_Checker/", 13 'Connection':'keep-alive' 14 } 15 16 url="http://www.ip-adress.com/proxy_list/" 17 req=requests.get(url,headers=headers) 18 soup=BeautifulSoup(req.text) //BeautifulSoup(str) 19 rsp=soup.find_all('tr',{'class':'odd'}) 20 rsp1=soup.find_all('tr',{'class':'even'}) 21 for eliment in rsp: 22 print eliment.td.text //the first one 23 24 for eliment1 in rsp1: 25 print eliment1.td.text