帮朋友抓了一些代理IP,并根据测试联的通性,放在了不通的文件夹下。特将源码分享
注意:
1,环境Python3.5
2,安装BeautifulSoup4 requests
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#-*- coding:gb18030 -*- from bs4 import BeautifulSoup import requests import time import os,sys all_url_add = { 'url2' : 'http://ip84.com/gn/' , } def func(url): r = requests.get(url) content = r.text soup = BeautifulSoup(content, "html.parser" ) ListTable = soup.find_all( "table" , class_ = "list" ) for table in ListTable: ListTr = table.find_all( "tr" ) for tr in ListTr: try : ListTd = tr.find_all( "td" ) ipaddress = str (ListTd[ 0 ].get_text()).strip() port = str (ListTd[ 1 ].get_text()).strip() city = str (ListTd[ 2 ].get_text()).strip().replace( "
" , "") leixing = str (ListTd[ 3 ].get_text()).strip() xieyi = str (ListTd[ 4 ].get_text()).strip() shudu = str (ListTd[ 5 ].get_text()).strip() time1 = str (ListTd[ 6 ].get_text()).strip() f = open ( "ip" + '.txt' , 'a' ) f.write(ipaddress + ":" + port + '
' ) f.close() print ( '地址:' + ipaddress + "端口:" + port + "地区:" + city + "类型:" + leixing + "协议" + xieyi + "速度" + shudu + "时间:" + time1) except Exception as e: print (u "-------------------程序异常-----------------------" ) return 'success' print (u '本页抓取结束,正在跳转下一页' ) def pin(): f2 = open ( 'ip.txt' , 'r' ) count = len ( open ( 'ip.txt' , 'rU' ).readlines()) for x in range (count): ip = f2.readline().split( ':' )[ 0 ] return1 = os.system( 'ping -n 5 -w 5 %s' % ip) if return1: print ( '测试失败' ) else : print ( '测试成功,正在写入新文件' ) f3 = open ( 'SuccessIp.txt' , 'a' ) f3.write(f2.readline() + '
' ) f3.close() f2.close() print ( '程序结束,可用IP已放在SuccessIp中' ) if __name__ = = '__main__' : for x in all_url_add: print (x) for y in range ( 1 , 50 ): url = all_url_add[x] + str (y) print (url) status = func(url) if status = = 'success' : print (y, '页结束' ) print (u '****程序抓取运行结束,正在检查所得IP连通性,请勿关闭窗口*****' ) pin() |
有点乱,有时间将数据存储在数据库,再将这个功能集成在博客当中。
Rex博客保留所有权利