zoukankan      html  css  js  c++  java
  • IP+IDC-chinaz抓取

    #-*-coding:gbk-*-
    #code by anyun.org
    import urllib
    import re
    import time
    
    
    def getHtml(url):
        page = urllib.urlopen(url)
        html = page.read()
        html = html.replace('
    ', '')
        html = html.replace('       ', ' ')
        html = html.replace('   ', '')
        html = html.replace('   ', '')
        # html = html.replace(' ','')
        return html
    
    
    def getcontext(html):
        reg = (r'<span class="Whwtdhalf w15-0">(.*?)</span>')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def getadd(html):
        reg = (r'<span class="Whwtdhalf w50-0">(.*?)</span>')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    def geterr(html):
        reg = (r'<div class="col-red lh30 fz14 jspu">(.*?)</div>')
        listre = re.compile(reg)
        mylist = re.findall(listre, html)
        return mylist
    
    if __name__ == '__main__':
    	f =open('list.txt','r')
    	for i in f.readlines():
    		i=i.strip()
    		
    		try:
    			Url='http://ip.chinaz.com/?ip=http://'+i
    		except:
    			print 'error'
    		Html = getHtml(Url)
    	#	print (getcontext(Html))
    		
    		if len(geterr(Html))==0:
    			print getcontext(Html)[0],getcontext(Html)[3] 
    			,getcontext(Html)[1],getcontext(Html)[4] 
    			,getcontext(Html)[2],getcontext(Html)[5] 
    			,getadd(Html)[0],getadd(Html)[1]
    			
    			f1 = open('ok.txt','a')
    			print >>f1,getcontext(Html)[0],getcontext(Html)[3] 
    			,getcontext(Html)[1],getcontext(Html)[4] 
    			,getcontext(Html)[2],getcontext(Html)[5] 
    			,getadd(Html)[0],getadd(Html)[1]
    			f1.close()
    			
    		else:
    			print i,'解析失败'
    			f2=open('err.txt','a')
    			print >>f2,i,'解析失败'
    			f2.close()
    		time.sleep(0.5)
    	print 'over'
    

      

  • 相关阅读:
    高德地图
    微信小程序蓝牙
    微信小程序请求封装
    create-react-app配置less
    浏览器渲染原理及流程
    输入网址到呈现网页发生的过程
    cookie的理解
    浏览器本地存储
    cookie,localStorage,sessionStorage区别
    关于this指向
  • 原文地址:https://www.cnblogs.com/crac/p/5778741.html
Copyright © 2011-2022 走看看