zoukankan      html  css  js  c++  java
  • 入门测试,扒拉百度搜索结果

    import sys
    import re
    from typing import List
    import json
    
    from selenium import webdriver
    #from gjypjd.utils import exetcute_sql,if_headless
    #import pymysql
    from selenium.webdriver import Firefox
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.firefox.options import Options
    from selenium.webdriver.support import expected_conditions as expected
    from selenium.webdriver.support.wait import WebDriverWait
    
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
    
    options = Options()
    options.add_argument('-headless')  # 无头参数
    # options.set_headless(True)
    # chrome_options = Options()
    # chrome_options.add_argument('--headless')
    # chrome_options.add_argument('--disable-gpu')
    
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
    "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36"
    )
    
    # driver = webdriver.Firefox(firefox_options = options)#这里是火狐的浏览器运行方法
    driver = webdriver.PhantomJS(desired_capabilities = dcap)#
    
    cookies ={"acw_tc": "276aedc516189085565492268e1530fd7e043ac4523bf573676dca2fca8f3f",
        "JSESSIONID": "C0B475EE5962B7C90F71EB862627BB73.7",
        "neCYtZEjo8GmS": "5O6Hn0i.gDRp6f8dJGudI4.UBHLWgRJlh5l0LYeQwx.9KEWPXazNRAx0ALqk7D1CDyRK5iJD4mQirkhOvKvo_eq",
        "neCYtZEjo8GmT": "53o_V4Cr523Lqqqmg4vUfBGZPKILr44ujVQJu1T3uCI6R2_HxDDUrgumJJqgoZPIADbffc.Zm0xoyktxoxtzqBT3PWI.SuY9aAu7l4hCzrWT5FmQzfRobFk490Pqa7n9DQ7uA86KhZsBZndKQIISaJ3YmIyjTFNa9TH4s8smZyjU2U1zwCiLF3SqijpHDPH3IQSX2DY49D_KkGemtWhT6leZepOcSqfVht8hFiv6uIMWKnsKELMPggGPmevnoLer0RYunt5uXlyTznUKdvOnH8AroiEJe72V2xqHyZ.cDdTtdWvPqib17RKRZb1c7WobFG"
    }
    
    driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/dir.html?type=yp')
    
    driver.implicitly_wait(10)
    
    print(driver.page_source)
    
    
    cookies1= driver.get_cookie('domain');
    print(cookies1)
    
    with open("cookies.txt", "r") as f:
        # 从文件获取cookies,并转化成list对象
        cookies: List[Dict] = json.load(f)
    # 遍历每一条cookies,把登录的cookies传入到企业微信中
    for cookie in cookies:
        # 由于selenium的cookies不支持expiry,所以需要去掉
        if "expiry" in cookie.keys():
            # dict支持pop的删除函数
            cookie.pop("expiry")
        # 添加cookies
        driver.add_cookie(cookie)
    
    # driver.add_cookie(cookie_dict= cookies)
    #get 方法 打开指定网址
    driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/search.jsp?6SQk6G2z=GBK-5RWnNqwnGCEoyB6.X6qnkNUjJ44QBnuyOPTxHTrYkEVvJ_zldCQbi6OTK9gkK9QsBjidwgOSqy8a.aQYrg5SizKfHWyPoUF_u4uGfeAMDaoMNmkHbMzfgDMwYcj3fFjXQoiewH_.zQW53CWqKVDHO27YoNfVLGVqanx73YBQK_MGhAGWgCM1PFK7Fz0LvBQe6QURlVuhdpVNmN7wR4MUcec6UwQW4eAq4K5dIQY9Hj76NcKe5yxyb9GJqCDZ70c.D5fLtmNvOyIKSW08REmwXuR_xWJpSqLa9.sZFs3DpZ8913WU1ccwv.a1aNtJDeMQ14S8R.JOOSj2P5zhjENRj43LqrbMZIzs53f4S_mZbLV7&c1SoYK0a=GBK-4fzZ4ejgwRW3SCbDGETEb9bW8e_EQpv8bHkTV0LSyoMbKIL7lpMe7MKFCg_vcVd1P5rVJQNaT8WNG7XYltPd0db7VSRRcUegLXEpKYnPt1t.oVEvxl5ICYo7rsOrufJj6isZrBY25E2UCx2UFW8UfieSqYjda9fAMWsC2oDK4FjTxvgDF8gw1MnNGSVybtCXd')
    
    driver.implicitly_wait(10)
    
    
    # driver.implicitly_wait(1000)
    
    #选择网页元素
    # element_keyword = driver.find_element_by_id('kw')
    #
    # #输入字符
    # element_keyword.send_keys('宋曲')
    #
    # #找到搜索按钮
    # element_search_button = driver.find_element_by_id('su')
    #
    # element_search_button.click()
    # time.sleep(2)
    
    # driver.implicitly_wait(10)
    
    # resultElemnts= driver.find_elements_by_class_name('result-op')
    
    html = driver.page_source
    print(driver.page_source)
    
    print('result compile=')
    regex = re.compile('<h3 class="t"><a[^>]*(.*?)</a>')
    
    tx = 1
    
    patterns = re.findall(regex, html)
    for i in patterns:
        print(tx)
        print(i)
        # print(re.match(r'<a.*>(.*)</a>', i).group(1))
        # print(i[1])
        # print(i[1].split('//')[1])
        tx = tx + 1
    
    
    print('result count=')
    # print(len(resultElemnts))
    #
    # print('result t=')
    # print(resultElemnts[1].find_element_by_class_name('t').text)
    
    # discount=1
    # for item in resultElemnts:
    #     if discount == 1:
    #         discount = discount +1
    #         continue
    #
    #     # s2 = (item.find_element_by_xpath('//div/a[1]'))
    #     s9 = item.find_element_by_class_name('t')
    #     if s9 is not None:
    #         if s9.is_displayed():
    #             print(discount)
    #             print(s9.text)
    #
    #         # print(item.find_element_by_class_name('t').text)
    #
    #     # if item.is_displayed():
    #     #     print(discount)
    #     #     print(item.text)
    #     discount=discount+1
    
    
    # s2=resultElemnts[0].find_element_by_xpath(self,'//div/a[1]')
    # print('s1=')
    # if s2.is_displayed():
    #     print(s2.text)
    # print(resultElemnts[0].find_element_by_xpath('//div/a[1]'))
    print('s1 end')
    # ret = driver.find_element_by_id('1')
    # print(ret.text)
    #
    # if ret.text.startswith('宋曲'):#是不是已宋曲开头
    #     print('测试通过')
    # else:
    #     print('不通过')
    
    #最后,driver.quit()让浏览器和驱动进程一起退出,不然桌面会有好多窗口
    driver.quit()
    
    
    print('hello')
  • 相关阅读:
    Creating a generic Web Parts for hosting ASP.NET User Controls
    Speed Up SQL Server Apps 提高SQL Server应用程序的运行效率 (Part 1)
    How to use CreateChildContorls method inherited from System.Web.UI.Control
    How to quickly access Web Part Management Page
    SQL Script tips for MS SQL Server
    How to enable single signon service on the SPS
    A brief summary of UML & Rational Rose – Use Case Diagram, Part II
    Borland Together for Visual Studio.Net V2.0 安装问题
    Speed Up SQL Server Apps 提高SQL Server应用程序的运行效率 (Part 2)
    体验ReSharper V1.0 for VS.Net 2003 Part I
  • 原文地址:https://www.cnblogs.com/CoreXin/p/14688865.html
Copyright © 2011-2022 走看看