zoukankan html css js c++ java

爬虫学习06用selenium爬取空间

用selenium爬取空间
from selenium import webdriver
from lxml import etree
import time
pro = webdriver.Chrome(executable_path=r'C:Users古月蜀黍Desktopchromedriver_win32chromedriver.exe')
pro.get(url='https://i.qq.com/?s_url=http%3A%2F%2Fuser.qzone.qq.com%2F1355144989%2Finfocenter')
# 获取iframe标签
pro.switch_to.frame('login_frame')
my_button = pro.find_element_by_id('switcher_plogin')
my_button.click()
# 输入账号密码
username = pro.find_element_by_id('u')
username.send_keys('1355144989')
password = pro.find_element_by_id('p')
password.send_keys('liqian521.1314')
login = pro.find_element_by_id('login_button')
login.click()
time.sleep(2)

js = 'window.scrollTo(0, document.body.scrollHeight)'
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
pro.execute_script(js)
time.sleep(2)
# 获取当前显示页面的源数据
page_text = pro.page_source
tree = etree.HTML(page_text)

text = tree.xpath('//div[@class="f-info"]//text()')

print(text)
pro.quit()


无界面浏览器PhantomJS
from selenium import webdriver
import time
pro = webdriver.PhantomJS(executable_path=r'C:Users古月蜀黍Desktop文件汇总爬虫phantomjsinphantomjs.exe')
pro.get(url = 'https://www.baidu.com')
# 根据find系列的函数定位到指定标签
my_input = pro.find_element_by_id('kw')
# 向获取的标签中输入数据
time.sleep(2)
my_input.send_keys('胡涛')
pro.save_screenshot('./1.jpg')
my_button = pro.find_element_by_id('su')
# 给标签绑定点击事件
time.sleep(2)
my_button.click()
# 获取当前显示页面的源码
time.sleep(2)
pro.save_screenshot('./2.jpg')                         
page_text = pro.page_source
print(page_text)
                          
# 退出页面
pro.quit()



谷歌无界面浏览器的配置
# 无界面浏览器的配置
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

from selenium import webdriver
import time
pro = webdriver.Chrome(executable_path=r'C:Users古月蜀黍Desktopchromedriver_win32chromedriver.exe',chrome_options=chrome_options)
pro.get('https://www.baidu.com')
# 根据find系列的函数定位到指定标签
my_input = pro.find_element_by_id('kw')
# 向获取的标签中输入数据
time.sleep(2)
my_input.send_keys('胡涛')
pro.save_screenshot('./111.png')
my_button = pro.find_element_by_id('su')
# 给标签绑定点击事件
time.sleep(2)
my_button.click()
# 获取当前显示页面的源码
time.sleep(2)
pro.save_screenshot('./222.png')                         
page_text = pro.page_source
print(page_text)
                          
# 退出页面
pro.quit()

查看全文

相关阅读:
18_异常机制和File类
 20个简洁的 JS 代码片段
 在 Python 中实现延迟调用
 停止 Goroutine 有几种方法？
图解Python中深浅copy
Python 自制简单实用的日志装饰器
 Go 里的错误得这样写才优雅~
推荐8个炫酷的 Python 装饰器！
两个 Django 插件( django_extensions,django_toolbar)
一文看懂Python系列之装饰器(decorator)

原文地址：https://www.cnblogs.com/hu13/p/9275294.html