zoukankan      html  css  js  c++  java
  • 使用webdriver + phantomjs + pdfkit 生成PDF文件

    实例

    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    
    '''
    Created on Dec 6, 2013
    
    @author: Jay <smile665@gmail.com>
    @description: use PhantomJS to parse a web page to get the geo info of an IP
    '''
    import datetime
    import urllib
    # from pyquery import PyQuery as pq
    import pdfkit
    
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait  # available since 2.4.0
    from selenium.webdriver.support import expected_conditions as EC  # available since
    import time
    
    #import db
    #from db import exec_sql, fetchone_sql, fetchall_sql
    
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    
    def spider_question(url):
        """
        功能说明:根据URL生成PDF
        """
        browser = webdriver.PhantomJS(executable_path='./phantomjs')  # 这要可能需要制定phatomjs可执行文件的位置
        # browser.set_window_size(1024, 786)
        print 'start request url', datetime.datetime.now()
        browser.get(url)  # Load page
        print 'end request url', datetime.datetime.now()
        try:
            WebDriverWait(browser, 60).until(EC.title_contains("complete"))  # 查找数据,500毫秒查找一次,找到则继续执行下面代码,超过10秒抛出异常
        except Exception, e:
            print "http 500"
            print e
            browser.quit()
            return "response_500"
        print "end math load:", datetime.datetime.now()
        html = browser.find_element_by_tag_name("html").get_attribute("innerHTML")
        browser.quit()
        html = '<!DOCTYPE html><html>' + html + "</html>"
    
        print 'begin pdfkit:', datetime.datetime.now()
        pdf_name = "test2.pdf"
        pdfkit.from_string(html, pdf_name)
        print "end pdfkit", datetime.datetime.now()
    
        return html
    
    
    def main():
        #url = "http://192.168.0.126/tea/test/wrongset/download/question/?day_list=20151103&user_id=849127&js=1"
        url = "http://192.168.0.126/open/math2_preview/?js=1&engine=webkit2&css=0"
        now = datetime.datetime.now()
        spider_question(url)
        now2 = datetime.datetime.now()
        print(now2-now)
        # pdfkit.from_string("hello", 'test.pdf')
    
    if __name__ == "__main__":
        main()
        print "completed"
  • 相关阅读:
    获取ocx运行路径的另一种方法
    使用D3D渲染YUV视频数据
    C++(MFC)中WebBrowser去除3D边框的方法(实现IDocHostUIHandler接口)
    ActiveX控件的安全初始化和脚本操作 和 数字签名SIGN
    解决Eclipse中的卡死现象
    Http请求头和响应头
    HTTP请求头与响应头
    centos7 Mariadb5.5升级到Mariadb10.2
    window下利用navicat访问Linux下的mariadb数据库
    在Linux上安装及配置MariaDB
  • 原文地址:https://www.cnblogs.com/weiok/p/5110069.html
Copyright © 2011-2022 走看看