zoukankan      html  css  js  c++  java
  • python使用selenium和requests.session登录抓取

    # Author:song
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from requests import session
    import requests
    
    def get_cookies():
        driver = webdriver.Chrome()
        driver.get('https://www.zhihu.com/signup?next=%2F')
    
        locad_butter = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.SignContainer-switch > span')
        locad_butter.click()
        import  time
        time.sleep(2)
        username = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > div.SignFlow-account > div.SignFlowInput.SignFlow-accountInputContainer > div.SignFlow-accountInput.Input-wrapper > input')
        username.send_keys('username')
        passwd = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > div.SignFlow-password > div > div.Input-wrapper > input')
        passwd.send_keys('passwd')
        #passwd.send_keys(Keys.RETURN)
        driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > button').click()
        time.sleep(10)
        cookies = driver.get_cookies()
        c={item['name']:item['value'] for item in cookies}
        cookies_value = 'aliyungf_tc={}; _xsrf={}; q_c1={}; capsion_ticket={}; _zap={}; z_c0={}; d_c0={}'.format(c['aliyungf_tc'], c['_xsrf'],c['q_c1'], c['capsion_ticket'],c['_zap'], c['z_c0'], c['d_c0'])
        return cookies_value
    
    
    def crawer_page(cookies_value):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36',
    
        }
        cookies_dict={
            'Cookie':cookies_value
        }
        cookies = requests.utils.cookiejar_from_dict(cookies_dict)#用requests的session方法保持cookie的时候发现requests不能保持手动构建的cookie,手动构建的cookie是dict类型的。要把dict转为 cookiejar类型。
        session = session()
        session.cookies=cookies
        response = session.get('https://www.zhihu.com/api/v4/search_v3?t=general&q=python&correction=1&offset=5&limit=10&search_hash_id=42cde34da2dc8ae359c5402be499b867',headers=headers)
        print(response.status_code)
  • 相关阅读:
    mysql增量同步到greenplum
    c笔记06--编译与作用域
    C笔记05-选择顺序结构,关系与相等,优先级和结合性
    C笔记02-C数据类型与数据类型转换
    C笔记01-C简介与补码
    jQuery属性操作之.val()函数
    jQuery属性操作之.attr()
    jQuery笔记: 基本概念与jQuery核心
    笔记: js构造函数与原型
    布尔运算符
  • 原文地址:https://www.cnblogs.com/master-song/p/8884633.html
Copyright © 2011-2022 走看看