zoukankan      html  css  js  c++  java
  • 验证码识别

    • import time
      from io import BytesIO
      from PIL import Image
      from selenium import webdriver
      from selenium.webdriver import ActionChains
      from selenium.webdriver.common.by import By
      from selenium.webdriver.support.ui import WebDriverWait
      from selenium.webdriver.support import expected_conditions as EC
      
      
      EMAIL = 'zcs@163.com'
      PASSWORD = '123'
      BORDER = 6
      
      class CrackGeetest():
          def __init__(self):
              self.url = 'https://account.geetest.com/login'
              self.browser = webdriver.Chrome()
              self.wait = WebDriverWait(self.browser, 10)
              self.email = EMAIL
              self.password = PASSWORD
      
          # 当需要删除对象来释放类所占用的资源的时候,就需要调用析构方法__del__()。
          def __del__(self):
              self.browser.close()
      
          def open_url(self):
              """
              打开网页输入用户名密码
              :return: None
              """
              self.browser.get(self.url)
              email = self.wait.until(
                  EC.presence_of_element_located(
                      (By.ID, 'ivu-form-item.ivu-form-item-required.ivu-form-item-error .ivu-input')
                  )
              )
              password = self.wait.until(
                  EC.presence_of_element_located(
                      (By.ID, '.ivu-input.ivu-input-has-value')
                  )
              )
              email.send_keys(self.email)
              password.send_keys(self.password)
      
          def get_button(self):
              """
              获取验证按钮
              :return:
              """
              button = self.wait.until(
                  EC.element_to_be_clickable(
                      (By.CLASS_NAME, 'geetest_radar_tip')
                  )
              )
              return button
      
          def get_slider(self):
              """
              获取滑块
              :return: 滑块对象
              """
              slider = self.wait.until(
                  EC.element_to_be_clickable(
                      (By.CLASS_NAME, 'geetest_slider_button')
                  )
              )
              return slider
      
          def get_image(self):
              """
              获取验证码图片
              :return: 图片对象(无缺口图像和有缺口图像)
              """
              return 1, 2
      
          def get_distance(self, image1, image2):
              """
              获取缺口距离
              :param image1: 无缺口的滑动验证码图片
              :param image2: 有缺口的滑动验证码图片
              :return:
              """
              left = 60
              # 根据滑块的像素为 60 ,选择 60 像素为起始点
              threhold = 70
              # 阈值为 70
      
              for i in range(left, image1.size[0]):
                  # 遍历 image1 的 left ~~ 最右端
                  for j in range(image1.size[1]):
                      # 遍历 image1 的高
      
                      rgb1 = image1.load()[i, j]
                      rgb2 = image2.load()[i, j]
                      res1 = abs(rgb1[0] - rgb2[0])
                      res2 = abs(rgb1[1] - rgb2[1])
                      res3 = abs(rgb1[2] - rgb2[2])
      
                      if not (res1 < threhold and res2 < threhold and res3 < threhold):
                          return i
                      # image1 和 image2 同一位置的 RGB差值 是否在阈值之内。假如不在,则为缺口位置
              return left
      
          def get_track(self, distance):
              """
              根据偏移量获取移动轨迹
              :param distance: 偏移量
              :return: 移动轨迹
              """
              # 移动轨迹
              track = []
              # 当前位移
              current = 0
              # 减速阈值
              mid = distance * 4 / 5
              # 计算间隔
              t = 0.2
              # 初速度
              v = 0
      
              # 当滑块位移小于缺口距离这时候
              while current < distance:
                  # 滑块位移小于缺口距离的 80% 这时,速度为2;否则速度为-3
                  if current < mid:
                      # 加速度为正2
                      a = 2
                  else:
                      # 加速度为负3
                      a = -3
                  # 初速度v0
                  v0 = v
                  # 当前速度v = v0 + at
                  v = v0 + a * t
                  # 移动距离x = v0t + 1/2 * a * t^2
                  move = v0 * t + 1 / 2 * a * t * t
                  # 当前位移
                  current += move
                  # 加入轨迹
                  track.append(round(move))
              return track
      
          def move_to_gap(self, slider, track):
              """
              拖动滑块到缺口处
              :param slider: 滑块
              :param track: 轨迹
              :return:
              """
              # 创建新的操作链
              action_chains1 = ActionChains(self.browser)
              # 按住鼠标左键
              down_left_mouse = action_chains1.click_and_hold(slider)
              # 执行所有储存的操作
              down_left_mouse.perform()
      
              # 遍历轨迹,获取每一小段位移距离
              for x in track:
                  # 创建新的操作链
                  action_chains2 = ActionChains(self.browser)
                  # 按住鼠标后移动滑块
                  move_mouse = action_chains2.move_by_offset(xoffset=x, yoffset=0)
                  # 执行
                  move_mouse.perform()
              time.sleep(0.5)
              # 执行滑块到缺口后,调用release()方法,松开鼠标
              ActionChains(self.browser).release().perform()
      
          def login(self):
              """
              登录
              :return: None
              """
              submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest-btn')))
              submit.click()
              time.sleep(10)
              print('登录成功')
      
          def crack(self):
              # 输入用户名密码
              self.open_url()
              # 点击验证按钮
              button = self.get_button()
              button.click()
              # 获取验证码图片和带缺口的验证码图片
              image1, image2 = self.get_image()
              # 获取缺口位置
              gap = self.get_distance(image1, image2)
              print('缺口位置', gap)
              # 减去缺口位移
              gap = gap - BORDER
              # 获取移动轨迹
              track = self.get_track(gap)
              print('滑动轨迹', track)
              # 拖动滑块
              slider = self.get_slider()
              self.move_to_gap(slider, track)
      
              success = self.wait.until(
                  EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
              print(success)
      
              # 失败后重试
              if not success:
                  self.crack()
              else:
                  self.login()
      
      
      def main():
          # 调用CrackGeetest()类
          crack_geetest = CrackGeetest()
          # 输入用户名密码
          crack_geetest.open_url()
          # 点击验证按钮
          button = crack_geetest.get_button()
          button.click()
          # 获取验证码图片和带缺口的验证码图片
          image1, image2 = crack_geetest.get_image()
          # 获取缺口位置
          gap = crack_geetest.get_distance(image1, image2)
          print('缺口位置', gap)
          # 减去缺口位移
          gap = gap - BORDER
          # 获取移动轨迹
          track = crack_geetest.get_track(gap)
          print('滑动轨迹', track)
          # 拖动滑块
          slider = crack_geetest.get_slider()
          crack_geetest.move_to_gap(slider, track)
      
          success = crack_geetest.wait.until(
              EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
          print(success)
      
          # 失败后重试
          if not success:
              main()
          else:
              crack_geetest.login()
      
      
      if __name__ == '__main__':
          main()
      View Code

      简单的图形验证码识别

      import tesserocr
      from PIL import Image
      
      image = Image.open('PFET.jpg')
      
      # 利用 Image 对象的 convert() 方法传入参数 "L" ,即可将图片转化为灰度图像
      image = image.convert('L')
      
      # 阈值默认为127
      threshold = 127
      
      table = []
      for i in range(256):
          if i < threshold:
              table.append(0)
          else:
              table.append(1)
      
      image = image.point(table, '1')
      
      image = image.convert('1')
      image.show()
      
      result = tesserocr.image_to_text(image)
      print(result)
      
      
      # 输出:
      PFRT
      View Code
    • 极验滑动验证码的识别https://www.geetest.com/show

      数据初始化
          def get_geetest_button(self):
              """
              获取初始验证按钮
              :return:
              """
              button = self.wait.until(
                  EC.element_to_be_clickable(
                      (By.CLASS_NAME, 'geetest_radar_tip')
                  )
              )
              return button
      获取验证按钮
          def get_position(self):
              """
              获取验证码位置
              :return: 验证码位置元组(上, 下, 左, 右)
              """
              img = self.wait.until(
                  EC.presence_of_element_located(
                      (By.CLASS_NAME, 'geetest_canvas_img')
                  )
              )
              time.sleep(2)
              location = img.location
              size = img.size
              top, bottom, left, right = 
                  location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']
              return (top, bottom, left, right)
      获取验证码图片所在位置
          def get_screenshot(self):
              """
              获取网页截图
              :return: 截图对象
              """
              screenshot = self.browser.get_screenshot_as_png()
              # 获取作为二进制数据的当前窗口的屏幕截图。
      
              screenshot = Image.open(BytesIO(screenshot))
              # Python在内存中读写数据,用到的模块是StringIO和BytesIO,
              # StringIO操作的只能是str,如果要操作二进制数据,就需要使用BytesIO。
      
              return screenshot
      获取网页截图
          def get_slider(self):
              """
              获取滑块
              :return: 滑块对象
              """
              slider = self.wait.until(
                  EC.element_to_be_clickable(
                      (By.CLASS_NAME, 'geetest_slider_button')
                  )
              )
              return slider
      获取滑块
          def get_geetest_image(self, name='captcha.png'):
              """
              获取验证码图片
              :return: 图片对象
              """
              top, bottom, left, right = self.get_position()
              print('验证码位置', top, bottom, left, right)
              screenshot = self.get_screenshot()
              # crop将图片裁剪
              captcha = screenshot.crop((left, top, right, bottom))
              # 将裁剪的验证码截图保存
              captcha.save(name)
              return captcha
      获取验证码图片截图
          def open_url(self):
              """
              打开网页输入用户名密码
              :return: None
              """
              self.browser.get(self.url)
              email = self.wait.until(
                  EC.presence_of_element_located(
                      (By.ID, 'email')
                  )
              )
              password = self.wait.until(
                  EC.presence_of_element_located(
                      (By.ID, 'password')
                  )
              )
              email.send_keys(self.email)
              password.send_keys(self.password)
      打开网页输入用户名密码
  • 相关阅读:
    脑洞大开的爬虫解决思路 转载:https://mp.weixin.qq.com/s/Bd-wz_RiRpYv8ufIbQTZDg
    js逆向某东滑块 转载 https://mp.weixin.qq.com/s/eZSTfduYS63-LOvkAofxqA
    不能爬小程序,叫什么会爬虫 【参考资料也要看】 https://mp.weixin.qq.com/s/oDG3k_qjMZaoygZmz9OUDw
    HDU6042 Journey with Knapsack
    HDU7073 Integers Have Friends 2.0
    CF1439C Greedy Shopping
    CF813E Army Creation
    POJ1322 Chocolate
    CF451E Devu and Flowers
    POJ3734 Blocks
  • 原文地址:https://www.cnblogs.com/liyihua/p/11235098.html
Copyright © 2011-2022 走看看