zoukankan      html  css  js  c++  java
  • python实训day6

    今天是Python实训的第六天,帅气的tank老师今天教我们了一些常用且实用的小技能。

    课堂笔记:

    1.关于selenium选择器xpath的使用:

    from selenium import webdriver
     
    driver = webdriver.Chrome(r'D:BaiduNetdiskDownloadchromedriver_win32chromedriver.exe')
     
     
    try:
        # 隐式等待: 写在get请求前
        driver.implicitly_wait(5)
     
        driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html')
     
        # 显式等待: 写在get请求后
        # wait.until(...)
     
        '''
         
    <html>
     <head>
      <base href='http://example.com/' />
      <title>Example website</title>
     </head>
     <body>
      <div id='images'>
       <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>
       <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a>
       <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a>
       <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a>
       <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a>
      </div>
     </body>
    </html>
        '''
        # 根据xpath语法查找元素
        # / 从根节点开始找第一个
        html = driver.find_element_by_xpath('/html')
        # html = driver.find_element_by_xpath('/head')  # 报错
        print(html.tag_name)
     
        # // 从根节点开始找任意一个节点
        div = driver.find_element_by_xpath('//div')
        print(div.tag_name)
     
        # @
        # 查找id为images的div节点
        div = driver.find_element_by_xpath('//div[@id="images"]')
        print(div.tag_name)
        print(div.text)
     
        # 找到第一个a节点
        a = driver.find_element_by_xpath('//a')
        print(a.tag_name)
     
        # 找到所有a节点
        a_s = driver.find_elements_by_xpath('//a')
        print(a_s)
     
        # 找到第一个a节点的href属性
        # get_attribute:获取节点中某个属性
        a = driver.find_element_by_xpath('//a').get_attribute('href')
        print(a)
     
    finally:
        driver.close()
    

      2.selenium剩余更多操作:

    点击、清除操作
    '''
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    import time
     
    driver = webdriver.Chrome(r'D:BaiduNetdiskDownloadchromedriver_win32chromedriver.exe')
     
    try:
        driver.implicitly_wait(10)
        # 1、往jd发送请求
        driver.get('https://www.jd.com/')
        # 找到输入框输入围城
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('围城')
        # 键盘回车
        input_tag.send_keys(Keys.ENTER)
        time.sleep(2)
        # 找到输入框输入墨菲定律
        input_tag = driver.find_element_by_id('key')
        input_tag.clear()
        input_tag.send_keys('墨菲定律')
        # 找到搜索按钮点击搜索
        button = driver.find_element_by_class_name('button')
        button.click()
        time.sleep(10)
     
    finally:
        driver.close()
     
     
    '''
    获取cookies  (了解)
    '''
    from selenium import webdriver
    import time
     
    driver = webdriver.Chrome(r'D:BaiduNetdiskDownloadchromedriver_win32chromedriver.exe')
     
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.zhihu.com/explore')
        print(driver.get_cookies())
     
        time.sleep(10)
    finally:
        driver.close()
     
    '''
    选项卡
    '''
    #选项卡管理:切换选项卡,有js的方式windows.open,有windows快捷键:
    # ctrl+t等,最通用的就是js的方式
    import time
    from selenium import webdriver
     
    browser = webdriver.Chrome()
    try:
        browser.get('https://www.baidu.com')
     
        # execute_script: 执行javascrpit代码
        # 弹窗操作
        # browser.execute_script('alert("tank")')
        # 新建浏览器窗口
        browser.execute_script(
            '''
            window.open();
            '''
        )
        time.sleep(1)
        print(browser.window_handles)  # 获取所有的选项卡
        # 切换到第二个窗口
        # 新:
        browser.switch_to.window(browser.window_handles[1])
        # 旧:
        # browser.switch_to_window(browser.window_handles[1])
     
        # 第二个窗口往淘宝发送请求
        browser.get('https://www.taobao.com')
        time.sleep(5)
     
        # 切换到第一个窗口
        browser.switch_to_window(browser.window_handles[0])
        browser.get('https://www.sina.com.cn')
     
        time.sleep(10)
    finally:
        browser.close()
     
     
    '''
    ActionChangs动作链
    '''
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    import time
     
    driver = webdriver.Chrome()
    driver.implicitly_wait(10)
    driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
     
    try:
     
        # driver.switch_to_frame('iframeResult')
        # 切换到id为iframeResult的窗口内
        driver.switch_to.frame('iframeResult')
     
        # 源位置
        draggable = driver.find_element_by_id('draggable')
     
        # 目标位置
        droppable = driver.find_element_by_id('droppable')
     
        # 调用ActionChains,必须把驱动对象传进去
        # 得到一个动作链对象,复制给一个变量
        actions = ActionChains(driver)
     
        # 方式一: 机器人
        # 瞬间把源图片位置秒移到目标图片位置
        # actions.drag_and_drop(draggable, droppable)  # 编写一个行为
        # actions.perform()  # 执行编写好的行为
     
     
        # 方式二: 模拟人的行为
        source = draggable.location['x']
        target = droppable.location['x']
        print(source, target)
     
        distance = target - source
        print(distance)
     
        # perform:每个动作都要调用perform执行
     
        # 点击并摁住源图片
        ActionChains(driver).click_and_hold(draggable).perform()
     
        s = 0
        while s < distance:
            # 执行位移操作
            ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()
            s += 2
     
        # 释放动作链
        ActionChains(driver).release().perform()
     
        time.sleep(10)
     
     
    finally:
        driver.close()
     
     
    '''
    前进、后退
    '''
    from selenium import webdriver
    import time
     
    driver = webdriver.Chrome()
     
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.jd.com/')
        driver.get('https://www.baidu.com/')
        driver.get('https://www.cnblogs.com/')
     
        time.sleep(2)
     
        # 回退操作
        driver.back()
        time.sleep(1)
        # 前进操作
        driver.forward()
        time.sleep(1)
        driver.back()
        time.sleep(10)
     
    finally:
        driver.close()
    

      3.破解登录方法:

    from selenium import webdriver
    from selenium.webdriver import ChromeOptions
    import time
    r'''
    步骤:
        1、打开文件的查看,显示隐藏文件
        2、找到C:UsersadministortraAppDataLocalGoogleChromeUser Data
            删除Default文件
        3、重新打开浏览器,并登陆百度账号
            - 此时会创建一个新的Default缓存文件
        4、添加cookies
        5、关闭谷歌浏览器后执行程序
    '''
    # 获取options对象,参数对象
    options = ChromeOptions()
     
    # 获取cookies保存路径
    # 'C:UsersadministortraAppDataLocalGoogleChromeUser Data'
    profile_directory = r'--user-data-dir=C:UsersadministortraAppDataLocalGoogleChromeUser Data'
     
    # 添加用户信息目录
    options.add_argument(profile_directory)
     
    # 把参数加载到当前驱动中  chrome_options默认参数,用来接收options对象
    driver = webdriver.Chrome(chrome_options=options)
     
    try:
        driver.implicitly_wait(10)
        driver.get('https://www.baidu.com/')
        '''
        BDUSS:*****
        '''
        # 添加用户cookies信息
        # name、value必须小写
        driver.add_cookie({"name": "BDUSS", "value": "用户session字符串"})
     
        # 刷新操作
        driver.refresh()
     
        time.sleep(10)
     
    finally:
        driver.close()
    

      4.爬取京东商品信息:

    # 爬取京东商品信息:
    #     请求url:
    #         https://www.jd.com/
    #     提取商品信息:
    #         1.商品详情页
    #         2.商品名称
    #         3.商品价格
    #         4.评价人数
    #         5.商品商家
    # '''
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    import time
     
    driver = webdriver.Chrome()
     
    try:
        driver.implicitly_wait(10)
        # 1、往京东主页发送请求
        driver.get('https://www.jd.com/')
     
        # 2、输入商品名称,并回车搜索
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('macbook')
        input_tag.send_keys(Keys.ENTER)
        time.sleep(2)
     
        # 通过JS控制滚轮滑动获取所有商品信息
        js_code = '''
            window.scrollTo(0,5000);
        '''
        driver.execute_script(js_code)  # 执行js代码
     
        # 等待数据加载
        time.sleep(2)
     
        # 3、查找所有商品div
        # good_div = driver.find_element_by_id('J_goodsList')
        good_list = driver.find_elements_by_class_name('gl-item')
        n = 1
        for good in good_list:
            # 根据属性选择器查找
            # 商品链接
            good_url = good.find_element_by_css_selector(
                '.p-img a').get_attribute('href')
     
            # 商品名称
            good_name = good.find_element_by_css_selector(
                '.p-name em').text.replace("
    ", "--")
     
            # 商品价格
            good_price = good.find_element_by_class_name(
                'p-price').text.replace("
    ", ":")
     
            # 评价人数
            good_commit = good.find_element_by_class_name(
                'p-commit').text.replace("
    ", " ")
     
            # 商品商家
            good_from = good.find_element_by_class_name(
                'J_im_icon').text.replace("
    ", " ")
     
            good_content = f'''
                        商品链接: {good_url}
                        商品名称: {good_name}
                        商品价格: {good_price}
                        评价人数: {good_commit}
                        商品商家: {good_from}
                        
    
                        '''
            print(good_content)
            with open('jd.txt', 'a', encoding='utf-8') as f:
                f.write(good_content)
     
        next_tag = driver.find_element_by_link_text('下一页')
     
        next_tag.click()
     
        time.sleep(10)
     
     
    finally:
        driver.close()
    

      5.作业

    (1):爬取京东商品信息:

    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    import time
     
    def get_good(driver):
        try:
            js_code='''
                window.scrollTo(0,5000);
                    '''
            driver.execute_script(js_code)
     
            time.sleep(2)
            good_list = driver.find_elements_by_class_name('gl-item')
            n = 1
            for good in good_list:
                good_url = good.find_element_by_css_selector('.p-img a').get_attribute('href')
     
                good_name = good.find_element_by_css_selector('.p-name em').text.replace("
    ", "--")
     
                good_price = good.find_element_by_class_name('p-price').text.replace("
    ", ":")
                good_commit = good.find_element_by_class_name('p-commit').text.replace("
    ", " ")
                good_from = good.find_element_by_class_name('J_im_icon').text.replace("
    ", " ")
     
                good_content = f'''
                                商品链接:{good_url}
                                商品名称:{good_name}
                                商品价格:{good_price}
                                评价人数:{good_commit}
                                商品商家:{good_from}
                                
    
                                '''
                print(good_content)
                # with open('jd.txt', 'a', encoding='utf-8')as f:
                #     f.write(good_content)
            time.sleep(10)
     
            next_tag=driver.find_element_by_class_name('pn-next')
            next_tag.click()
            time.sleep(2)
            get_good(driver)
            time.sleep(10)
     
        finally:
            driver.close()
     
     
    if __name__ == '__main__':
        good_name=input('请输入商品名:').strip()
     
        driver = webdriver.Chrome()
        driver.implicitly_wait(10)
        driver.get("https://www.jd.com/")
        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys(good_name)
        input_tag.send_keys(Keys.ENTER)
        time.sleep(2)
        get_good(driver)
    

      

  • 相关阅读:
    Spring AOP详解 、 JDK动态代理、CGLib动态代理
    mysql 日期 字符串 时间戳转换
    图文:通过sql server 连接mysql
    c# 数据绑定之 DataFormatString 格式
    sql 截取字符串与 截取字符串最长的字符串
    oracle 清除表空间
    sql 遍历结果print和表格形式
    国家与城市的sql
    sql2005 将一列的多行内容拼接成一行
    oracle和mssql中复制表的比较
  • 原文地址:https://www.cnblogs.com/jacob1998/p/11048355.html
Copyright © 2011-2022 走看看