zoukankan      html  css  js  c++  java
  • python使用chrome抓取页面中ajax请求返回的数据

    #-*-coding:utf-8-*-
    
    from time import sleep
    from selenium import webdriver
    import json
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
    
    # 过滤出数据请求中的headers
    def getHttpInfo(browser):
        for responseReceived in browser.get_log('performance'):
            try:
                response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
                if 'ajaxUrl' in response['url']:
                    # print(response)
                    # print(response['url'])
                    # print(response['headers'])
                    # print(response['headersText'])
                    return response['requestHeaders']
            except:
                pass
        return None
    
    # 请求页面 并设置headers到文件中
    def setHeaders():
        d = DesiredCapabilities.CHROME
        d['loggingPrefs'] = { 'performance':'ALL' }
        options=webdriver.ChromeOptions()
        options.set_headless()
        options.add_argument('--disable-gpu')
    
        driver=webdriver.Chrome(desired_capabilities=d,options=options)
        driver.get('http://www.baidu.com')
        sleep(20)
        headers = getHttpInfo(driver)
        driver.quit()
        # write header
        hand = open('header.txt', 'w')
        hand.write(json.dumps(headers))
        hand.close()
    
    if __name__ == '__main__':
        setHeaders()
    
  • 相关阅读:
    gauss消元
    POJ1229 域名匹配
    HDU3487 play with chain
    POJ1185 炮兵阵地
    POJ2411
    sgu233 little kings
    树形DP初步-真树1662
    树形DP初步-二叉树1661
    c++——string类用法
    UVa1354 ——天平难题
  • 原文地址:https://www.cnblogs.com/ningmo/p/10695876.html
Copyright © 2011-2022 走看看