zoukankan      html  css  js  c++  java
  • 潭州课堂25班:Ph201805201 爬虫基础 第十一课 点触验证码 (课堂笔记)

    打开 网易盾 http://dun.163.com/trial/picture-click  ——在线体验——图中点选

    打码平台 ——超级鹰    http://www.chaojiying.com/

     网易盾  抓取验证码图片

    # -*- coding: utf-8 -*-
    # 斌彬电脑
    # @Time : 2018/9/13 0013 5:27
    
    from selenium import webdriver
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver import ActionChains     # 动作链
    from selenium.webdriver.common.by import By
    import requests,re
    from PIL import Image
    from io import BytesIO      # 不写入磁盘,显示图片文件
    import time
    
    from chao_ji_yin import Chaojiying_Client          # 超级鹰
    
    class WanYy():
        def __init__(self, user, pas):
            # 浏览器参数
            self.user = user
            self.pas = pas
            options = Options()
            options.add_argument('--window-size=1366,768')
            self.dri = webdriver.Chrome(chrome_options=options)
            self.wait = WebDriverWait(self.dri, 10)
    
        def get_start(self):
            # 请求网页
            self.dri.get('http://dun.163.com/trial/picture-click')
            # 下拉页面
            self.dri.execute_script('window.scrollTo(0, 500)')
    
        def get_image(self):
            #  点击按键,显示验证码
            #  获取验证码 图片
            self.wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div[2]/div/div[2]/div[3]/div/div/div[2]/div[3]/span[2]'))).click()
            #  等待加载完成 截图
            # time.sleep(2)
            # 验证码位置
            im = self.wait.until(EC.visibility_of_element_located((
                By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div[2]/div/div[2]/div[3]/div/div/div[1]/div/div[1]/img[1]'
            )))  # 异步  比 time.sleep 好
            im1 = BytesIO(self.dri.get_screenshot_as_png())
            # Image.open(im).show()
            im2 = Image.open(im1)
            #                     浏览器的左上角坐标     -500 因为下滑了500,
            window_im = im2.crop((im.location['x'], im.location['y']-500,im.location['x']+310, im.location['y']+210-500))
            # window_im.show()
            im_data = BytesIO()
            window_im.save(im_data, format('png'))
            # 返回图片 二进制 数据
            return im_data.getvalue()
    
        # 调用超级鹰 识别验证码
        def post_validation_participation(self,im_data ):
            # 实例化 超级鹰
            chao = Chaojiying_Client(self.user, self.pas, '897271')
            data = chao.PostPic(im_data, '9103')      # 超级鹰反回的 json 数据
            data = data.get('pic_str')
            data_list = [i.split(',') for i in data.split('|')]   #  验证码图片上第个字的坐标
            # print(data_list)
            return data_list
    
        #  得到位置信息,进行点击
        def click_word(self, data_list):
            # 验证码位置 ,节点
            im = self.wait.until(EC.visibility_of_element_located((
                By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div[2]/div/div[2]/div[3]/div/div/div[1]/div/div[1]/img[1]'
            )))
            #  根据节点移动鼠标
            # 移到第一个字位置
            ActionChains(self.dri).move_to_element_with_offset(im,int(data_list[0][0]), int(data_list[0][1])).perform()
            #  点击
            ActionChains(self.dri).click().perform()
            time.sleep(1)
    
             # 移到第二个字位置
            ActionChains(self.dri).move_to_element_with_offset(im,int(data_list[1][0]), int(data_list[1][1])).perform()
            #  点击
            ActionChains(self.dri).click().perform()
            time.sleep(1)
    
             # 移到第三个字位置
            ActionChains(self.dri).move_to_element_with_offset(im,int(data_list[2][0]), int(data_list[2][1])).perform()
            #  点击
            ActionChains(self.dri).click().perform()
    
    
        #  灰像函数一样调用
        def __call__(self, *args, **kwargs):
            self.get_start()
            imdata = self.get_image()
            da_list = self.post_validation_participation(imdata)
            print(da_list)
            self.click_word(da_list)
            # time.sleep(5)
            # self.dri.close()
    
    
    if __name__ == '__main__':
        yedun = WanYy('超级鹰账号', '密码')
        yedun()
    

      

    超级鹰  验证码读取

    # -*- coding: utf-8 -*-
    # 斌彬电脑
    # @Time : 2018/9/13 0013 5:04
    
    #!/usr/bin/env python
    # coding:utf-8
    
    import requests
    from hashlib import md5
    
    class Chaojiying_Client(object):
    
        def __init__(self, username, password, soft_id):
            self.username = username
            password = password.encode('utf8')
            self.password = md5(password).hexdigest()
            self.soft_id = soft_id
            self.base_params = {
                'user': self.username,
                'pass2': self.password,
                'softid': self.soft_id,
            }
            self.headers = {
                'Connection': 'Keep-Alive',
                'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
            }
    
        def PostPic(self, im, codetype):
            """
            im: 图片字节
            codetype: 题目类型 参考 http://www.chaojiying.com/price.html
            """
            params = {
                'codetype': codetype,
            }
            params.update(self.base_params)
            files = {'userfile': ('ccc.jpg', im)}
            r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
            return r.json()
    
        def ReportError(self, im_id):
            """
            im_id:报错题目的图片ID
            """
            params = {
                'id': im_id,
            }
            params.update(self.base_params)
            r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
            return r.json()
    
    
    if __name__ == '__main__':
        chaojiying = Chaojiying_Client(账号, 密码, '897271')
        #用户中心>>软件ID 生成一个替换 96001
    	im = open('a.jpg', 'rb').read()
        #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
    	print(chaojiying) .PostPic(im, 1902)
        #1902 验证码类型  官方网站>>价格体系 3.4+版 print 后要加()
    

      

  • 相关阅读:
    【宁夏区域赛】G.Pot!
    【C#】上机实验二
    【C#】上机实验三
    Luogu P1437 敲砖块
    Luogu P1463 反素数
    Luogu P1445 樱花
    GHOJ 926 小X的AK计划
    【题解】Beads
    【题解】Antisymmetry
    【题解】A Horrible Poem
  • 原文地址:https://www.cnblogs.com/gdwz922/p/9638391.html
Copyright © 2011-2022 走看看