zoukankan      html  css  js  c++  java
  • pyqt get dynamic content from js

    # -*- coding: utf-8 -*-
    
    import re
    import csv
    import time
    try: 
        from PySide.QtGui import QApplication
        from PySide.QtCore import QUrl, QEventLoop, QTimer
        from PySide.QtWebKit import QWebView
    except ImportError:
        from PyQt4.QtGui import QApplication
        from PyQt4.QtCore import QUrl, QEventLoop, QTimer
        from PyQt4.QtWebKit import QWebView
    import lxml.html
    
      
    class BrowserRender(QWebView):  
        def __init__(self, display=True):
            self.app = QApplication([])
            QWebView.__init__(self)
            if display:
                self.show() # show the browser
    
        def open(self, url, timeout=60):
            """Wait for download to complete and return result"""
            loop = QEventLoop()
            timer = QTimer()
            timer.setSingleShot(True)
            timer.timeout.connect(loop.quit)
            self.loadFinished.connect(loop.quit)
            self.load(QUrl(url))
            timer.start(timeout * 1000)
            loop.exec_() # delay here until download finished
            if timer.isActive():
                # downloaded successfully
                timer.stop()
                return self.html()
            else:
                # timed out
                print 'Request timed out:', url
    
        def html(self):
            """Shortcut to return the current HTML"""
            return self.page().mainFrame().toHtml()
    
        def find(self, pattern):
            """Find all elements that match the pattern"""
            return self.page().mainFrame().findAllElements(pattern)
    
        def attr(self, pattern, name, value):
            """Set attribute for matching elements"""
            for e in self.find(pattern):
                e.setAttribute(name, value)
    
        def text(self, pattern, value):
            """Set attribute for matching elements"""
            for e in self.find(pattern):
                e.setPlainText(value)
    
        def click(self, pattern):
            """Click matching elements"""
            for e in self.find(pattern):
                e.evaluateJavaScript("this.click()")
    
        def wait_load(self, pattern, timeout=60):
            """Wait for this pattern to be found in webpage and return matches"""
            deadline = time.time() + timeout
            while time.time() < deadline:
                self.app.processEvents()
                matches = self.find(pattern)
                if matches:
                    return matches
            print 'Wait load timed out'
    
    
    def main(): 
        br = BrowserRender()
        br.open('http://example.webscraping.com/search')
        br.attr('#search_term', 'value', '.')
        br.text('#page_size option:checked', '1000')
        br.click('#search')
    
        elements = br.wait_load('#results a')
        writer = csv.writer(open('countries.csv', 'w'))
        for country in [e.toPlainText().strip() for e in elements]:
            writer.writerow([country])
    
    
    if __name__ == '__main__':
        main()
    
  • 相关阅读:
    一个简单的loading,纯属自娱自乐
    sql server CTE递归使用测试
    sql-删除无效sql链接
    sql-按周输出每月的周日期范围
    sql-计算每个月星期几有几天
    sql-GOTO跳转
    回滚与撤销
    数据库事务
    mysql 海量数据的存储和访问解决方案
    数据库范式
  • 原文地址:https://www.cnblogs.com/otfsenter/p/6566621.html
Copyright © 2011-2022 走看看