zoukankan      html  css  js  c++  java
  • 爬虫学习06用selenium爬取空间

    用selenium爬取空间
    from selenium import webdriver
    from lxml import etree
    import time
    pro = webdriver.Chrome(executable_path=r'C:Users古月蜀黍Desktopchromedriver_win32chromedriver.exe')
    pro.get(url='https://i.qq.com/?s_url=http%3A%2F%2Fuser.qzone.qq.com%2F1355144989%2Finfocenter')
    # 获取iframe标签
    pro.switch_to.frame('login_frame')
    my_button = pro.find_element_by_id('switcher_plogin')
    my_button.click()
    # 输入账号密码
    username = pro.find_element_by_id('u')
    username.send_keys('1355144989')
    password = pro.find_element_by_id('p')
    password.send_keys('liqian521.1314')
    login = pro.find_element_by_id('login_button')
    login.click()
    time.sleep(2)
    
    js = 'window.scrollTo(0, document.body.scrollHeight)'
    pro.execute_script(js)
    time.sleep(2)
    pro.execute_script(js)
    time.sleep(2)
    pro.execute_script(js)
    time.sleep(2)
    pro.execute_script(js)
    time.sleep(2)
    pro.execute_script(js)
    time.sleep(2)
    # 获取当前显示页面的源数据
    page_text = pro.page_source
    tree = etree.HTML(page_text)
    
    text = tree.xpath('//div[@class="f-info"]//text()')
    
    print(text)
    pro.quit()
    
    
    无界面浏览器PhantomJS
    from selenium import webdriver
    import time
    pro = webdriver.PhantomJS(executable_path=r'C:Users古月蜀黍Desktop文件汇总爬虫phantomjsinphantomjs.exe')
    pro.get(url = 'https://www.baidu.com')
    # 根据find系列的函数定位到指定标签
    my_input = pro.find_element_by_id('kw')
    # 向获取的标签中输入数据
    time.sleep(2)
    my_input.send_keys('胡涛')
    pro.save_screenshot('./1.jpg')
    my_button = pro.find_element_by_id('su')
    # 给标签绑定点击事件
    time.sleep(2)
    my_button.click()
    # 获取当前显示页面的源码
    time.sleep(2)
    pro.save_screenshot('./2.jpg')                         
    page_text = pro.page_source
    print(page_text)
                              
    # 退出页面
    pro.quit()
    
    
    
    谷歌无界面浏览器的配置
    # 无界面浏览器的配置
    from selenium.webdriver.chrome.options import Options
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    
    from selenium import webdriver
    import time
    pro = webdriver.Chrome(executable_path=r'C:Users古月蜀黍Desktopchromedriver_win32chromedriver.exe',chrome_options=chrome_options)
    pro.get('https://www.baidu.com')
    # 根据find系列的函数定位到指定标签
    my_input = pro.find_element_by_id('kw')
    # 向获取的标签中输入数据
    time.sleep(2)
    my_input.send_keys('胡涛')
    pro.save_screenshot('./111.png')
    my_button = pro.find_element_by_id('su')
    # 给标签绑定点击事件
    time.sleep(2)
    my_button.click()
    # 获取当前显示页面的源码
    time.sleep(2)
    pro.save_screenshot('./222.png')                         
    page_text = pro.page_source
    print(page_text)
                              
    # 退出页面
    pro.quit()
    

      

  • 相关阅读:
    [atAGC052D]Equal LIS
    [atAGC052C]Nondivisible Prefix Sums
    [atAGC052B]Tree Edges XOR
    [gym103055H]Grammy and HearthStone
    Vector底层结构和源码剖析
    ArrayList的底层源码分析及注意事项
    Collection接口
    集合介绍
    用户管理底层实现
    什么是Mybatis
  • 原文地址:https://www.cnblogs.com/hu13/p/9275294.html
Copyright © 2011-2022 走看看