zoukankan      html  css  js  c++  java
  • pyqt get dynamic content from js

    # -*- coding: utf-8 -*-
    
    import re
    import csv
    import time
    try: 
        from PySide.QtGui import QApplication
        from PySide.QtCore import QUrl, QEventLoop, QTimer
        from PySide.QtWebKit import QWebView
    except ImportError:
        from PyQt4.QtGui import QApplication
        from PyQt4.QtCore import QUrl, QEventLoop, QTimer
        from PyQt4.QtWebKit import QWebView
    import lxml.html
    
      
    class BrowserRender(QWebView):  
        def __init__(self, display=True):
            self.app = QApplication([])
            QWebView.__init__(self)
            if display:
                self.show() # show the browser
    
        def open(self, url, timeout=60):
            """Wait for download to complete and return result"""
            loop = QEventLoop()
            timer = QTimer()
            timer.setSingleShot(True)
            timer.timeout.connect(loop.quit)
            self.loadFinished.connect(loop.quit)
            self.load(QUrl(url))
            timer.start(timeout * 1000)
            loop.exec_() # delay here until download finished
            if timer.isActive():
                # downloaded successfully
                timer.stop()
                return self.html()
            else:
                # timed out
                print 'Request timed out:', url
    
        def html(self):
            """Shortcut to return the current HTML"""
            return self.page().mainFrame().toHtml()
    
        def find(self, pattern):
            """Find all elements that match the pattern"""
            return self.page().mainFrame().findAllElements(pattern)
    
        def attr(self, pattern, name, value):
            """Set attribute for matching elements"""
            for e in self.find(pattern):
                e.setAttribute(name, value)
    
        def text(self, pattern, value):
            """Set attribute for matching elements"""
            for e in self.find(pattern):
                e.setPlainText(value)
    
        def click(self, pattern):
            """Click matching elements"""
            for e in self.find(pattern):
                e.evaluateJavaScript("this.click()")
    
        def wait_load(self, pattern, timeout=60):
            """Wait for this pattern to be found in webpage and return matches"""
            deadline = time.time() + timeout
            while time.time() < deadline:
                self.app.processEvents()
                matches = self.find(pattern)
                if matches:
                    return matches
            print 'Wait load timed out'
    
    
    def main(): 
        br = BrowserRender()
        br.open('http://example.webscraping.com/search')
        br.attr('#search_term', 'value', '.')
        br.text('#page_size option:checked', '1000')
        br.click('#search')
    
        elements = br.wait_load('#results a')
        writer = csv.writer(open('countries.csv', 'w'))
        for country in [e.toPlainText().strip() for e in elements]:
            writer.writerow([country])
    
    
    if __name__ == '__main__':
        main()
    
  • 相关阅读:
    Python的__init__.py用法
    Python中文
    使用apache进行域名绑定
    Storm入门之第二章
    Storm入门之第一章
    【RabbitMQ+Python入门经典】兔子和兔子窝 笔记
    RabbitMQ之Topics(多规则路由)
    RabbitMQ之比较好的资料
    RabbitMQ之路由
    RabbitMQ之发布订阅
  • 原文地址:https://www.cnblogs.com/otfsenter/p/6566621.html
Copyright © 2011-2022 走看看