zoukankan      html  css  js  c++  java
  • 亲测,完全有效,使用webdriver,自写,裁判文书网,批量全部下载

    直接上代码(注意改用户名,密码)

     1 """程序说明"""
     2 # -*-  coding: utf-8 -*-
     3 # Author: zhou bo
     4 # Datetime : 2020
     5 # software: PyCharm
     7 from selenium import webdriver
     8 from selenium.webdriver.common.by import By
     9 from selenium.webdriver.support import expected_conditions as EC
    10 from selenium.webdriver.support.wait import WebDriverWait
    11 import math
    12 import time
    13 import logging
    14 from selenium.webdriver.firefox.options import Options
    15 import os
    16 from crawler_tools import user_agent as u
    17 from datetime import datetime
    18 from selenium.common.exceptions import *
    19 import pyautogui
    20 import random
    21 from selenium.webdriver import ActionChains
    22 from retrying import retry
    23 
    24 
    25 def login(driver):
    26     """登录"""
    27     # 切换框架
    28     wait = WebDriverWait(driver, 20)
    29     driver.refresh()
    30     frame = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="contentIframe"]')))
    31     driver.switch_to.frame(frame)
    32 
    33     click = wait.until(EC.presence_of_element_located(
    34         (By.XPATH, '//*[@id="phoneNumber"]')))
    35     click.send_keys("手机号")
    36     time.sleep(1)
    37     click1 = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/app-root/div/app-login/div/div/form/div/div[2]/input')))
    38     # click1.clear()
    39     click1.send_keys("密码")
    40     time.sleep(1)  # 等一秒是最优选择,短了网络错误
    41     button1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.custom-button')))
    42     button1.click()
    43     # 必须加上表单退出,否者就是死元素无法定位
    44     driver.switch_to.default_content()
    45 
    46     # 通过输入,进行进入数据
    47     select_value = wait.until(EC.presence_of_element_located(
    48         (By.XPATH, '//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input')))#//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input
    49     select_value.send_keys("合同纠纷")
    50     time.sleep(2)  # 等一秒是最优选择,短了网络错误
    51     driver.get(
    52         "https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7")
    53     five_to_15(driver)
    54     down_load(driver)
    55     while(1):
    56         next_page(driver)
    57         time.sleep(2)
    58         down_load(driver)
    59 
    60 def five_to_15(driver):
    61     wait = WebDriverWait(driver, 20)
    62     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select')))#//*[@id="_view_1545184311000"]/div[8]/div/select
    63     button_.click()
    64     # time.sleep(1)
    65     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select/option[3]')))#//*[@id="_view_1545184311000"]/div[8]/div/select/option[3]
    66     button_.click()
    67     time.sleep(1)
    68 
    69 def down_load(driver):
    70     wait = WebDriverWait(driver, 20)
    71     button_select = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[1]')))
    72     button_select.click()
    73     time.sleep(2)  # 等一秒是最优选择,短了网络错误
    74     button_download = wait.until(
    75         EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[3]')))
    76     button_download.click()
    77 
    78 def next_page(driver):
    79     wait = WebDriverWait(driver, 20)
    80     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="left_7_3"]/a[last()]')))
    81     time.sleep(2)
    82     button_.click()
    83 
    84 
    85 
    86 if __name__ =="__main__":
    87     # 读取限定词目录
    88     driver = webdriver.Chrome('E:GoogleDriverchromedriver.exe')
    89     driver.get("https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7")
    90     time.sleep(5)
    91     login(driver)
  • 相关阅读:
    未解
    HDU 4642 Fliping game 解题报告
    HDU 4639 Hehe 解题报告
    深入浅出Node.js (11)
    JS文本框获取焦点
    深入理解 BFC
    JS 中函数名后面加与不加括号的区别
    ES6 箭头函数
    sublime 格式化代码
    <!--more-->搭建的博客设置主页内容高度
  • 原文地址:https://www.cnblogs.com/smartisn/p/13865729.html
Copyright © 2011-2022 走看看