1.获取需要OCR识别的图片
from PIL import Image import pytesseract def screenshots_picture(driver,locator): ''' 截取需要被ocr识别的图片 :param driver:浏览器driver :param locator: 元素 :param fileName: 截图文件名称 :param screenshots_fileName: 识别图片文件名称 :return: 识别码 注意: # 如果是retina屏幕,必须要加这个不然,就会出现截取验证的图错误 # dpr = driver.execute_script('return window.devicePixelRatio') # im = Image.open(picture_name1) # img = im.crop((left * dpr, top * dpr, right * dpr, height * dpr)) ''' try: # 截取当前网页,该网页有我们需要的验证码 name = f'{time.time()}.png' fileName = filePictruePath(name) driver.save_screenshot(fileName) #定位到验证码的元素 imgelement = driver.find_element(*locator) # 获取验证码x,y轴坐标 location = imgelement.location x = int(location['x']) y = int(location['y']) #获取验证码的长宽 size = imgelement.size width = int(size['width']) height = int(size['height']) dpr = driver.execute_script('return window.devicePixelRatio') # 得到要被截图的位置坐标,通过两点定位要截图的位置 rangle = (x*dpr,y*dpr,(x+width)*dpr,(y+height)*dpr) #打开屏幕截图 open_fileName = Image.open(fileName) # 使用Image的crop函数,从截图中再次截取我们需要的区域 screenshots = open_fileName.crop(rangle) #保存已截取的验证码图片 ocr_name = f'{time.time()}ocr.png' screenshots_fileName = filePictruePath(ocr_name) screenshots.save(screenshots_fileName) return screenshots_fileName except Exception: return None
2.OCR识别图片
def ocr_code(screenshots_fileName): ''' ocr识别方法 :param screenshots_fileName: 被识别的文件名称 :return: 识别信息 ''' # 打开保存的图片 open_stream = Image.open(screenshots_fileName) # 使用pytesseract中的image_to_string方法获取识别验证码 identify_text = pytesseract.image_to_string(open_stream).strip() print(identify_text) # 过滤掉会受影响的符号 identify_text = filter_str(identify_text) return identify_text
用到的方法:
def filePictruePath(name): ''' 生成文件路径 :param name: :return: ''' file_dir = f"{os.path.dirname(os.path.dirname(__file__))}/screenshot/" if os.path.exists(file_dir) and os.path.isdir(file_dir): pass else: os.mkdir(file_dir) return os.path.join(file_dir,name) def filter_str(args): ''' 过滤字符串中的无效字符 :param args: 只留数字以及字符串 :return: ''' new_str = str(args) new_str = ''.join(new_str.strip().split()) str_list = [] for i in new_str: if '0' <= i and i <= '9': str_list.append(i) elif i.upper() >= 'A' and i.upper() <= 'Z': str_list.append(i) return ''.join(str_list)
会出现下面的错误:
具体解决办法见:https://blog.csdn.net/qq_31362767/article/details/107891185