zoukankan      html  css  js  c++  java
  • selenium登录csdn,urllib抓取数据

       selenium登录csdn,urllib抓取数据:

    import selenium
    import requests
    import selenium.webdriver
    import selenium.webdriver.common.keys
    import time
    #需要手动滑动验证码
    driver = selenium.webdriver.Chrome()
    driver.get("https://passport.csdn.net/login?code=public")
    login=driver.find_element_by_link_text("账号密码登录")
    login.click()
    time.sleep(5)
    username=driver.find_element_by_id("all")
    username.send_keys("用户名")
    time.sleep(3)
    password=driver.find_element_by_id("password-number")
    password.send_keys("密码")
    time.sleep(5)
    logins=driver.find_element_by_xpath("//*[@id="app"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button")
    time.sleep(10) #手动滑动验证码
    logins.click()
    print(driver.page_source)
    time.sleep(15)  #等待cookie加载
    
    print("开始会话")
    req=requests.session()  #会话   打开一个网页,直到关闭浏览器之前 都是会话
    cookies=driver.get_cookies()  #抓取全部的cookie
    for  cookie  in cookies:
        req.cookies.set(cookie['name'],cookie["value"])
    req.headers.clear()#清空头
    newpage=req.get("http://my.csdn.net/")
    print("会话完成")
    print(newpage.text)  #页面
    time.sleep(10)
    driver.close()

     urllib保存cookie:

    #coding:utf-8
    import selenium
    import selenium.webdriver
    import time
    import lxml
    import lxml.etree
    import requests
    import urllib.request
    
    '''
    
    
    driver = selenium.webdriver.Chrome()
    driver.get("https://passport.csdn.net/account/login?")
    time.sleep(3)
    
    
    user=driver.find_element_by_id("username")
    password=driver.find_element_by_id("password")
    submit=driver.find_element_by_class_name("logging")
    user.clear()
    password.clear()
    time.sleep(1)
    user.send_keys("yincheng01@163.com")
    password.send_keys("yinchengak47.net")
    time.sleep(1)
    submit.click()
    time.sleep(10) #等待页面加载,
    cookies=driver.get_cookies()#抓取全部的cookie
    print cookies
    print "------------------------"
    driver.close()
    
    '''
    
    
    print("开始会话")
    headers={                  #
    "Host": "my.csdn.net",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "DNT": "1",
    "Referer": "http://www.csdn.net/",
    #"Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.8",
        u"cookie":u"uuid_tt_dd=-1734079490838081701_20171010; bdshare_firstime=1507966544895; UserName=yinghuming; UserInfo=LZTCl6p9mr%2BUgX1cEEgqwIand1mBReKkuogvIYHivh6MdgAq8c4Y4%2Fmx1uhFT%2FmWFuTu%2BCna36D%2BZ7ssW7xuzAjlIwc7Vgjd7Y7zTDJqy%2FakzOPFEGR52GRrp8sf0i9NK7p2hdvM39vRq5Y7NLJObQ%3D%3D; UserNick=%E8%8B%B1%E9%9B%84%E6%97%A0%E6%95%8C2017; AU=821; UD=%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80; UN=yincheng0571; UE="yincheng01@163.com"; BT=1508039179648; access-token=8260e0b7-a35c-419d-b4af-1f02d51c677d; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1507965242,1507969974,1508038063,1508039035; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508039041; dc_tos=oxuidd; dc_session_id=1508039034960_0.6956040327941211" }
    request=urllib.request.Request("http://my.csdn.net/",headers=headers)
    response=urllib.request.urlopen(request)
    newpagetext=response.read()
    file=open("csdn.txt","wb")
    file.write(newpagetext)
    file.close()
    print(newpagetext)
    print("会话完成")
    
    time.sleep(10)
    
    
    
    
    
    time.sleep(10)
    #driver.close()

    selenium+urllib 模拟登录 抓取数据:

    import selenium
    import selenium.webdriver
    import selenium.webdriver.common.keys
    import urllib.request
    import time
    #需要手动滑动验证码
    driver = selenium.webdriver.Chrome()
    driver.get("https://passport.csdn.net/login?code=public")
    login=driver.find_element_by_link_text("账号密码登录")
    login.click()
    time.sleep(5)
    username=driver.find_element_by_id("all")
    username.send_keys("用户名")
    time.sleep(3)
    password=driver.find_element_by_id("password-number")
    password.send_keys("密码")
    time.sleep(5)
    logins=driver.find_element_by_xpath("//*[@id="app"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button")
    time.sleep(10) #手动滑动验证码
    logins.click()
    print(driver.page_source)
    time.sleep(15)  #等待cookie加载
    cookies=driver.get_cookies()  #抓取全部的cookie
    print(cookies)
    cookiestr=""
    for  cookie  in cookies:   #每一条cookie信息
        print(cookie['name'],cookie["value"])   #一般用于登录的信息都在name和value里
        cookiestr += (str(cookie["name"]) + "=" + str(cookie["value"]) + ";")
    print("------------------------")
    
    #cookies
    print("开始会话")
    headers={
    "Host": "my.csdn.net",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "DNT": "1",
    "Referer": "http://www.csdn.net/",
    #"Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.8",
        u"cookie": cookiestr   # u为中文转义
        }
    request=urllib.request.Request("http://my.csdn.net/",headers=headers)
    response=urllib.request.urlopen(request)
    newpagetext=response.read()
    file=open("csdn.txt","wb")
    file.write(newpagetext)
    file.close()
    print(newpagetext)
    print("会话完成")
    
    time.sleep(10)
    driver.close()
  • 相关阅读:
    全端开发必备!10个最好的 Node.js MVC 框架
    action和servlet的关系
    js模块化
    前端类库
    Windows下配置nginx+php(wnmp)
    DllMain的作用
    在linux上实现DllMain + 共享库创建方法
    QT实现Windows下DLL程序编写
    平台相关的宏
    远程线程的注入 PE的修正
  • 原文地址:https://www.cnblogs.com/my-global/p/12484149.html
Copyright © 2011-2022 走看看