zoukankan      html  css  js  c++  java
  • COM-IE-(2)

    # -*- coding:UTF-8 -*- 
    import sys
    from time import sleep
    import win32com.client
    from win32com.client import DispatchEx
    
    stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
    reload(sys)
    sys.setdefaultencoding("utf-8")
    sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
    
    class COM_IE:
    	def __init__(self,url=None):
    		self.url = url
    		self.Visible = 1
    		self.ie = self.openIE(url)
    		self.document = ""
    		self.text = ""
    		self.charset = None
    
    	def ExistIE(self,url):
    		ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
    		ies=DispatchEx(ShellWindowsCLSID)
    		if len(ies)==0:
    			return None
    		for ie in ies:
    			if ie.LocationURL==url:
    				return ie
    		return None
    		
    	def NewIE(self,url):
    		ie = DispatchEx("InternetExplorer.Application")
    		ie.Visible = self.Visible
    		ie.Navigate(url)
    		return ie
    		
    	def openIE(self,url):
    		ie = self.ExistIE(url)
    		if ie==None:
    			ie = self.NewIE(url)
    		return ie
    		
    	def WaitIE(self):
    		# while self.ie.Busy:
    			# leep(1)
    		while 1:    
    			state = self.ie.ReadyState    
    			if state ==4: 
    				# print "load done..."
    				self.charset  = self.ie.Document.charset
    				self.document = self.ie.Document.body.innerHTML
    				self.text = self.ie.Document.body.innerText 
    				break    
    			sleep(1)
    			
    	def Visible(self):
    		self.ie.Visible = self.Visible
    		
    	def GetBody(self):
    		self.WaitIE()
    		return self.ie.Document.body
    		
    	def GetNodes(self,parentNode,tag):
    		"""
    		>>> coldiv=GetNodes(body,"div")
    		"""
    		childNodes=[]
    		for childNode in parentNode.getElementsByTagName(tag):
    			childNodes.append(childNode)
    		return childNodes
    		
    	def NodeByAttr(self,Nodes,nodeattr,nodeval):
    		"""
    		>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
    		"""
    		for node in Nodes:
    			if str(node.getAttribute(nodeattr))==nodeval:
    				return node
    		return None
    
    	def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html):
    		tags = self.GetNodes(body,node_type)
    		node = self.NodeByAttr(tags,node_attr,node_attr_val)
    		node.innerHTML = node_inner_html
    			
    	
    	def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value):
    		tags = self.GetNodes(body,node_type)
    		node = self.NodeByAttr(tags,node_attr,node_attr_val)
    		node.value = node_value
    
    	def NodeClick(self,body,node_type,node_attr,node_attr_val):
    		tags = self.GetNodes(body,node_type)
    		node = self.NodeByAttr(tags,node_attr,node_attr_val)
    		node.click()
    		
    	def GetNodeHtml(self,body,node_type,node_attr,node_attr_val):
    		tags = self.GetNodes(body,node_type)
    		node = self.NodeByAttr(tags,node_attr,node_attr_val)
    		html = node.innerHTML
    		return html
    		
    	def GetNodeVal(self,body,node_type,node_attr,node_attr_val):
    		tags = self.GetNodes(body,node_type)
    		node = self.NodeByAttr(tags,node_attr,node_attr_val)
    		value = node.value 
    		return value
    		
    		
    	#mutiple nodes
    	def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None):
    		"""
    		>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
    		"""
    		value_list = []
    		for node in Nodes:
    			# print node.nodeType,node.nodeName	#,node.getAttribute("id"),node.innerText 
    			value_dict = {}
    			if not nodeattr:
    				nodeattr_list = ["id","nodeName","nodeType","nodeValue","className",
    				"innerHTML","innerText","href","name","title","type","value"]
    				for attr in nodeattr_list:
    					value_dict[attr] = node.getAttribute(attr)
    				value_list.append(value_dict)	
    			else:
    				if not nodeval:
    					value_dict[nodeattr] = node.getAttribute(nodeattr)
    					value_list.append(value_dict)
    				else:
    					if str(node.getAttribute(nodeattr))==nodeval:
    						value_dict[nodeattr] = node.getAttribute(nodeattr)
    						value_list.append(value_dict)	
    		return value_list
    	
    	#mutiple nodes	
    	def GetNodesVal(self,body,node_type,node_attr=None,node_val=None):
    	
    		# print '*'*50
    		tags = self.GetNodes(body,node_type)
    		value_list = self.NodesByAttr(tags,node_attr,node_val)	
    		return value_list
    
    	def Quit(self):
    		self.ie.Quit()
    
    
    if __name__=="__main__":
    
    	url = "http://blog.csdn.net/agoago_2009/"
    	IE = COM_IE(url)
    	BODY = IE.GetBody()
    	
    	# a_list = IE.GetNodesVal(BODY,"a","href")
    	a_list = IE.GetNodesVal(BODY,"a")
    	for a in a_list:
    		print a.get("innerText"),a.get("href")
    	
    	
    	
    	'''
    	IE.SetNodeVal(BODY,"input","id","inputSearch","COM")
    	IE.NodeClick(BODY,"input","id","btnSubmit")
    	
    	IE.WaitIE()
    	print IE.document.strip()[:100]
    	print IE.charset
    	print IE.text.strip()[:100]
    	'''
    	
    	raw_input('quit')
    	IE.Quit()		
    	
    	
    	

  • 相关阅读:
    任正非用人:砍掉高层手脚、中层屁股、基层脑袋、全身赘肉!
    SourceTree的基本使用
    Spring学习(三)
    Spring学习(二)
    Spring学习(一)
    SpringBoot学习(五)
    SpringBoot学习(四)
    SpringBoot学习(二)
    SpringBoot学习(三)
    SpringBoot学习(一)
  • 原文地址:https://www.cnblogs.com/zsychanpin/p/6936240.html
Copyright © 2011-2022 走看看