zoukankan html css js c++ java

python selenium爬虫滑块验证

思路：

1、获取带滑块的图片

2、获取不带滑块、完整的图片

3、比较两张图片中不一样的地方，找到滑块的坐标

4、通过滑块坐标来拖动浏览器

代码：

import random
import time
from PIL import Image
from io import BytesIO

import requests as rq
from bs4 import BeautifulSoup as bs

from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver import ChromeOptions


def crop_image(image_file_name):  
    # 保存图片
    # 截图验证码图片
    # 定位某个元素在浏览器中的位置
    time.sleep(2)
    img = browser.find_element_by_xpath("//*[@class='geetest_canvas_slice geetest_absolute']")
    location = img.location
    print("图片的位置", location)
    size = img.size
    top, buttom, left, right = location["y"], location["y"]+size["height"], location["x"], location['x'] + size["width"]
    print("验证码位置", left,top, right, buttom)
    screenshot = browser.get_screenshot_as_png()
    screenshot = Image.open(BytesIO(screenshot))
    captcha = screenshot.crop((int(left),int(top), int(right), int(buttom)))
    captcha.save(image_file_name)
    return captcha


def compare_pixel(image1, image2, i, j):
    # 判断两个图片像素是否相同
    pixel1 = image1.load()[i, j]
    pixel2 = image2.load()[i, j]
    
    threshold = 60
    if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:
        return True
    return False


def find_coordinate(left, img1, img2):
    # 根据判断结果，返回x坐标
    has_find = False
    for i in range(left, img1.size[0]):  # x坐标
        if has_find:  # 找到不一样的位置，退出外层循环
            break
        for j in range(img1.size[1]):  # y坐标（从0开始）
            if not compare_pixel(img1, img2, i, j):  # 比较两张图片在同一位置的值
                left = i
                has_find = True  # 如果两张图片元素不一样，那么就退出内层循环
                break
    return left

options = ChromeOptions()     #实例化一个ChromeOptions对象，设置参数避免被检测
options.add_experimental_option('excludeSwitches', ['enable-automation'])  #以键值对的形式加入参数
options.add_argument("--no-sandbox")
options.add_argument("--start-maximized")  # 最大化窗口，一定要最大化，不然坐标会不准
options.add_argument('--disable-gpu')
browser=webdriver.Chrome(options=options)
time.sleep(0.6)

browser.get('https://www.bilibili.com/')  # 访问网站
time.sleep(0.5)

button = browser.find_element_by_xpath('//div[@class="mini-login van-popover__reference"]')  # 登陆
button.click()

browser.switch_to.window(browser.window_handles[1])  # 对于新窗口，切换窗口句柄，不然句柄还保持在上一窗口，后面捕捉会出错

user = browser.find_element_by_xpath('//input[@id="login-username"]')
user.send_keys('19444338')  # 用户名
time.sleep(0.5)

passwd = browser.find_element_by_xpath('//input[@id="login-passwd"]')
passwd.send_keys('3346777')  # 密码
time.sleep(0.5)

login = browser.find_element_by_xpath('//a[@class="btn btn-login"]')  # 点击登录，出现验证码图片
login.click()
time.sleep(0.5)


# 滑块验证
def slid_verify():
    # 缺口图片
    img1 = crop_image('缺口图片.png')
    time.sleep(0.5)

    # 完整图片
    # JS增删改查操作元素的属性
    # #新增属性
    # driver.execute_script(“arguments[0].%s=arguments[1]” %attributeName,elementObj, value)
    # #修改属性
    # driver.execute_script(“arguments[0].setAttribute(arguments[1],arguments[2])”, elementObj, attributeName, value)
    # #获取属性
    # elementObj.get_attribute(attributeName)
    # #删除属性
    # driver.execute_script(“arguments[0].removeAttribute(arguments[1])”,elementObj, attributeName)
    # https://blog.csdn.net/DansonC/article/details/99398096
    img_obj = browser.find_element_by_xpath('//*[@class="geetest_canvas_fullbg geetest_fade geetest_absolute"]')  # 找到图片，建立对象
    # img_style = img_obj.get_attribute('style')  # 记录style的值
    browser.execute_script("arguments[0].removeAttribute(arguments[1])",img_obj, 'style')  # 删除图片属性，显示完整图片
    img2 = crop_image('完整图片.png')
    # browser.execute_script("arguments[0].setAttribute(arguments[1],arguments[2])", img_obj, 'style', img_style)  # 将style值添加回去，显示缺口图片

    slider = browser.find_element_by_xpath("//div[@class='geetest_slider_button']")  # 找到拖动按钮
    ActionChains(browser).move_to_element(slider).perform()  # 建立拖动对象
    
    # 获取滑块图片位置
    slid_coor = find_coordinate(2, img1, img2)    
    # 获取缺口图片位置
    target_coor = find_coordinate(57, img1, img2)
    print(slid_coor, target_coor)
    target_coor -= 6  # 调整偏移量
    #拖动图片
    
    track = []  # 用于储存一次拖动滑块的距离（不能一次拖到位，不然会被判定为机器）
    i = 0
    # 分为3断，分别设置不同速度，越接近缺口，越慢
    stagev1 = round((target_coor-slid_coor)/4)  # 第1段（前3/5）：分为4次（平均距离移动），stafev1为当前阶段的速度
    while i<round(target_coor* 3/5):
        i += stagev1
        track.append(stagev1)
    stagev2 = round((target_coor-i)/7)  # 第2段（3/5到21/25）：分为7次（平均距离移动）
    while i<round(target_coor*21/25):
        i += stagev2
        track.append(stagev2)
    stagev3 = 1
    while i<round(target_coor):  # 第3段（21/25到最后）：按1为单位移动  
        i += stagev3
        track.append(stagev3)
    ActionChains(browser).click_and_hold(slider).perform()  # 点击，并按住鼠标不放
    for x in track:
        ActionChains(browser).move_by_offset(xoffset=x, yoffset=0).perform()  # 拖动，x为一次移动的距离
    time.sleep(0.5)
    ActionChains(browser).release().perform()  # 放开鼠标
    time.sleep(1)
    success_flag = Selector(text=browser.page_source).xpath('/html/body/div[2]/div[2]/div[6]/div/div[1]/div[1]/div/div[3]/div/div[2]/text()').extract()[0]
    return success_flag

success_flag = slid_verify()
while '超过' not in success_flag:  # 成功后，会有：'sec 秒的速度超过 score% 的用户'
    if '怪物吃了拼图，请重试' == success_flag:  # 这是被判定为机器操作，需要"点击重试"
        reclick = browser.find_element_by_xpath("//div[@class='geetest_panel_error_content']")
        reclick.click()
    re_verify = browser.find_element_by_xpath("//div[@class='geetest_slider_button']")
    re_verify.click()
    success_flag = slid_verify()
    time.sleep(0.5)

查看全文

相关阅读:
nginx防盗链配置
 nginx禁止非sever_name指定域名访问
 linux下配置python环境 django创建helloworld项目
 node解析修改ngix配置文件
 ~/.ssh/config文件的使用
 SpringCloud-Feign声明式服务调用
 Hystrix 配置参数全解析
 Eureka 的高级使用
 eureka中显示有服务但是通过ribbon调用显示No instances available for service-hello的问题
 EureKa:服务注册与发现

原文地址：https://www.cnblogs.com/jaysonteng/p/12875193.html