zoukankan      html  css  js  c++  java
  • 入门测试,扒拉百度搜索结果

    import sys
    import re
    from typing import List
    import json
    
    from selenium import webdriver
    #from gjypjd.utils import exetcute_sql,if_headless
    #import pymysql
    from selenium.webdriver import Firefox
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.firefox.options import Options
    from selenium.webdriver.support import expected_conditions as expected
    from selenium.webdriver.support.wait import WebDriverWait
    
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
    
    options = Options()
    options.add_argument('-headless')  # 无头参数
    # options.set_headless(True)
    # chrome_options = Options()
    # chrome_options.add_argument('--headless')
    # chrome_options.add_argument('--disable-gpu')
    
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
    "Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36"
    )
    
    # driver = webdriver.Firefox(firefox_options = options)#这里是火狐的浏览器运行方法
    driver = webdriver.PhantomJS(desired_capabilities = dcap)#
    
    cookies ={"acw_tc": "276aedc516189085565492268e1530fd7e043ac4523bf573676dca2fca8f3f",
        "JSESSIONID": "C0B475EE5962B7C90F71EB862627BB73.7",
        "neCYtZEjo8GmS": "5O6Hn0i.gDRp6f8dJGudI4.UBHLWgRJlh5l0LYeQwx.9KEWPXazNRAx0ALqk7D1CDyRK5iJD4mQirkhOvKvo_eq",
        "neCYtZEjo8GmT": "53o_V4Cr523Lqqqmg4vUfBGZPKILr44ujVQJu1T3uCI6R2_HxDDUrgumJJqgoZPIADbffc.Zm0xoyktxoxtzqBT3PWI.SuY9aAu7l4hCzrWT5FmQzfRobFk490Pqa7n9DQ7uA86KhZsBZndKQIISaJ3YmIyjTFNa9TH4s8smZyjU2U1zwCiLF3SqijpHDPH3IQSX2DY49D_KkGemtWhT6leZepOcSqfVht8hFiv6uIMWKnsKELMPggGPmevnoLer0RYunt5uXlyTznUKdvOnH8AroiEJe72V2xqHyZ.cDdTtdWvPqib17RKRZb1c7WobFG"
    }
    
    driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/dir.html?type=yp')
    
    driver.implicitly_wait(10)
    
    print(driver.page_source)
    
    
    cookies1= driver.get_cookie('domain');
    print(cookies1)
    
    with open("cookies.txt", "r") as f:
        # 从文件获取cookies,并转化成list对象
        cookies: List[Dict] = json.load(f)
    # 遍历每一条cookies,把登录的cookies传入到企业微信中
    for cookie in cookies:
        # 由于selenium的cookies不支持expiry,所以需要去掉
        if "expiry" in cookie.keys():
            # dict支持pop的删除函数
            cookie.pop("expiry")
        # 添加cookies
        driver.add_cookie(cookie)
    
    # driver.add_cookie(cookie_dict= cookies)
    #get 方法 打开指定网址
    driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/search.jsp?6SQk6G2z=GBK-5RWnNqwnGCEoyB6.X6qnkNUjJ44QBnuyOPTxHTrYkEVvJ_zldCQbi6OTK9gkK9QsBjidwgOSqy8a.aQYrg5SizKfHWyPoUF_u4uGfeAMDaoMNmkHbMzfgDMwYcj3fFjXQoiewH_.zQW53CWqKVDHO27YoNfVLGVqanx73YBQK_MGhAGWgCM1PFK7Fz0LvBQe6QURlVuhdpVNmN7wR4MUcec6UwQW4eAq4K5dIQY9Hj76NcKe5yxyb9GJqCDZ70c.D5fLtmNvOyIKSW08REmwXuR_xWJpSqLa9.sZFs3DpZ8913WU1ccwv.a1aNtJDeMQ14S8R.JOOSj2P5zhjENRj43LqrbMZIzs53f4S_mZbLV7&c1SoYK0a=GBK-4fzZ4ejgwRW3SCbDGETEb9bW8e_EQpv8bHkTV0LSyoMbKIL7lpMe7MKFCg_vcVd1P5rVJQNaT8WNG7XYltPd0db7VSRRcUegLXEpKYnPt1t.oVEvxl5ICYo7rsOrufJj6isZrBY25E2UCx2UFW8UfieSqYjda9fAMWsC2oDK4FjTxvgDF8gw1MnNGSVybtCXd')
    
    driver.implicitly_wait(10)
    
    
    # driver.implicitly_wait(1000)
    
    #选择网页元素
    # element_keyword = driver.find_element_by_id('kw')
    #
    # #输入字符
    # element_keyword.send_keys('宋曲')
    #
    # #找到搜索按钮
    # element_search_button = driver.find_element_by_id('su')
    #
    # element_search_button.click()
    # time.sleep(2)
    
    # driver.implicitly_wait(10)
    
    # resultElemnts= driver.find_elements_by_class_name('result-op')
    
    html = driver.page_source
    print(driver.page_source)
    
    print('result compile=')
    regex = re.compile('<h3 class="t"><a[^>]*(.*?)</a>')
    
    tx = 1
    
    patterns = re.findall(regex, html)
    for i in patterns:
        print(tx)
        print(i)
        # print(re.match(r'<a.*>(.*)</a>', i).group(1))
        # print(i[1])
        # print(i[1].split('//')[1])
        tx = tx + 1
    
    
    print('result count=')
    # print(len(resultElemnts))
    #
    # print('result t=')
    # print(resultElemnts[1].find_element_by_class_name('t').text)
    
    # discount=1
    # for item in resultElemnts:
    #     if discount == 1:
    #         discount = discount +1
    #         continue
    #
    #     # s2 = (item.find_element_by_xpath('//div/a[1]'))
    #     s9 = item.find_element_by_class_name('t')
    #     if s9 is not None:
    #         if s9.is_displayed():
    #             print(discount)
    #             print(s9.text)
    #
    #         # print(item.find_element_by_class_name('t').text)
    #
    #     # if item.is_displayed():
    #     #     print(discount)
    #     #     print(item.text)
    #     discount=discount+1
    
    
    # s2=resultElemnts[0].find_element_by_xpath(self,'//div/a[1]')
    # print('s1=')
    # if s2.is_displayed():
    #     print(s2.text)
    # print(resultElemnts[0].find_element_by_xpath('//div/a[1]'))
    print('s1 end')
    # ret = driver.find_element_by_id('1')
    # print(ret.text)
    #
    # if ret.text.startswith('宋曲'):#是不是已宋曲开头
    #     print('测试通过')
    # else:
    #     print('不通过')
    
    #最后,driver.quit()让浏览器和驱动进程一起退出,不然桌面会有好多窗口
    driver.quit()
    
    
    print('hello')
  • 相关阅读:
    gulp使用技巧-删除node_modules文件夹,解决目录层次太深删除报错的问题
    PHP学习-链接数据库
    教程笔记《JavaScript深入浅出》
    读书笔记《高性能网站建设指南》之雅虎军规
    CSS3边框图片-像素虚边的问题
    WebStorm设置手机测试服务器-局域网内其他设备访问
    gulp的安装和使用
    H5canvas赛车游戏-基于lufylegend引擎
    WebStorm设置左侧菜单栏背景色和样式
    基于jquery的-获取短信验证码-倒计时
  • 原文地址:https://www.cnblogs.com/CoreXin/p/14688865.html
Copyright © 2011-2022 走看看