zoukankan      html  css  js  c++  java
  • python 验证码识别示例(二) 复杂验证码识别

       在这篇博文中手把手教你如何去分割验证,然后进行识别。

    一:下载验证码

        

      验证码分析,图片上有折线,验证码有数字,有英文字母大小写,分类的时候需要更多的样本,验证码的字母是彩色的,图片上有雪花等噪点,因此识别改验证码难度较大

    二:二值化和降噪:

      

     

     三: 切割:

        

     四:分类:

        

    五:   测试识别率

                    

     六:总结:

      综合识别率在70%左右,对于这个识别率我觉得还是挺高的,因为这个验证码的识别难度还是很大

    代码:

    一.  下载图片:

      

    #-*-coding:utf-8-*-
    import requests
    
    def spider():
        url = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
        for i in range(1, 101):
            print("正在下载的张数是:",i)
            with open("./1__get_image/{}.png".format(i), "wb") as f:
                f.write(requests.get(url).content)
    spider()

    二: 验证码二值化和降噪:

      

    #-*-coding:utf-8-*-
    # coding:utf-8
    import sys, os
    from PIL import Image, ImageDraw
    
    # 二值数组
    t2val = {}
    
    
    def twoValue(image, G):
        for y in range(0, image.size[1]):
            for x in range(0, image.size[0]):
                g = image.getpixel((x, y))
                if g > G:
                    t2val[(x, y)] = 1
                else:
                    t2val[(x, y)] = 0
    
    
    # 根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0 <N <8),当A的RGB值与周围8个点的RGB相等数小于N时,此点为噪点
    # G: Integer 图像二值化阀值
    # N: Integer 降噪率 0 <N <8
    # Z: Integer 降噪次数
    # 输出
    #  0:降噪成功
    #  1:降噪失败
    def clearNoise(image, N, Z):
        for i in range(0, Z):
            t2val[(0, 0)] = 1
            t2val[(image.size[0] - 1, image.size[1] - 1)] = 1
    
            for x in range(1, image.size[0] - 1):
                for y in range(1, image.size[1] - 1):
                    nearDots = 0
                    L = t2val[(x, y)]
                    if L == t2val[(x - 1, y - 1)]:
                        nearDots += 1
                    if L == t2val[(x - 1, y)]:
                        nearDots += 1
                    if L == t2val[(x - 1, y + 1)]:
                        nearDots += 1
                    if L == t2val[(x, y - 1)]:
                        nearDots += 1
                    if L == t2val[(x, y + 1)]:
                        nearDots += 1
                    if L == t2val[(x + 1, y - 1)]:
                        nearDots += 1
                    if L == t2val[(x + 1, y)]:
                        nearDots += 1
                    if L == t2val[(x + 1, y + 1)]:
                        nearDots += 1
    
                    if nearDots < N:
                        t2val[(x, y)] = 1
    
    
    def saveImage(filename, size):
        image = Image.new("1", size)
        draw = ImageDraw.Draw(image)
    
        for x in range(0, size[0]):
            for y in range(0, size[1]):
                draw.point((x, y), t2val[(x, y)])
    
        image.save(filename)
    
    for i in range(1, 101):
    
        path = "1__get_image/" + str(i) +  ".png"
        image = Image.open(path)
    
        image = image.convert('L')
        twoValue(image, 198)
        clearNoise(image, 3, 1)
        path1 = "2__erzhihua_jiangzao/" + str(i) + ".jpg"
        saveImage(path1, image.size)

    三:  切割验证码:

      

    #-*-coding:utf-8-*-
    
    
    from PIL import Image
    
    
    
    def smartSliceImg(img, outDir, ii,count=4, p_w=3):
        '''
        :param img:
        :param outDir:
        :param count: 图片中有多少个图片
        :param p_w: 对切割地方多少像素内进行判断
        :return:
        '''
        w, h = img.size
        pixdata = img.load()
        eachWidth = int(w / count)
        beforeX = 0
        for i in range(count):
    
            allBCount = []
            nextXOri = (i + 1) * eachWidth
    
            for x in range(nextXOri - p_w, nextXOri + p_w):
                if x >= w:
                    x = w - 1
                if x < 0:
                    x = 0
                b_count = 0
                for y in range(h):
                    if pixdata[x, y] == 0:
                        b_count += 1
                allBCount.append({'x_pos': x, 'count': b_count})
            sort = sorted(allBCount, key=lambda e: e.get('count'))
    
            nextX = sort[0]['x_pos']
            box = (beforeX, 0, nextX, h)
            img.crop(box).save(outDir + str(ii) + "_" + str(i) + ".png")
            beforeX = nextX
    
    for ii in  range(1, 101):
        path = "2__erzhihua_jiangzao/" + str(ii) + ".jpg"
        img = Image.open(path)
        outDir = '3__qiege/'
        smartSliceImg(img, outDir, ii,count=4, p_w=3)

    四: 训练:

        

    #-*-coding:utf-8-*-
    
    import numpy as np
    import os
    import time
    
    from PIL import Image
    from sklearn.externals import joblib
    from sklearn.neighbors import KNeighborsClassifier
    
    
    def load_dataset():
        X = []
        y = []
        for i in "23456789ABVDEFGHKMNPRSTUVWXYZ":
            target_path = "fenlei/" + i
            print(target_path)
            for title in os.listdir(target_path):
                pix = np.asarray(Image.open(os.path.join(target_path, title)).convert('L'))
                X.append(pix.reshape(25 * 30))
                y.append(target_path.split('/')[-1])
    
        X = np.asarray(X)
        y = np.asarray(y)
        return X, y
    
    def check_everyone(model):
        pre_list = []
        y_list = []
        for i in "23456789ABCDEFGHKMNPRSTUVWXYZ":
            part_path = "part/" + i
            for title in os.listdir(part_path):
                pix = np.asarray(Image.open(os.path.join(part_path, title)).convert('L'))
                pix = pix.reshape(25 * 30)
                pre_list.append(pix)
                y_list.append(part_path.split('/')[-1])
        pre_list = np.asarray(pre_list)
        y_list = np.asarray(y_list)
    
        result_list = model.predict(pre_list)
        acc = 0
        for i in result_list == y_list:
            print(result_list,y_list,)
    
            if i == np.bool(True):
                acc += 1
        print(acc, acc / len(result_list))
    
    
    X, y = load_dataset()
    knn = KNeighborsClassifier()
    knn.fit(X, y)
    joblib.dump(knn, 'yipai.model')
    check_everyone(knn)

    五:模型测试:

        

    # -*- coding: utf-8 -*-
    
    import numpy as np
    from PIL import Image
    from sklearn.externals import joblib
    import os
    
    target_path = "1__get_image/"
    source_result = []
    for title in os.listdir(target_path):
        source_result.append(title.replace('.png',''))
    
    
    def predict(model):
        predict_result = []
        for q in range(1,101):
            pre_list = []
            y_list = []
            for i in range(0,4):
                part_path = "part1/" + str(q) + "_" + str(i) + ".png"
                # print(part_path)
                pix = np.asarray(Image.open(os.path.join(part_path)))
                pix = pix.reshape(25 * 30)
                pre_list.append(pix)
                y_list.append(part_path.split('/')[-1])
            pre_list = np.asarray(pre_list)
            y_list = np.asarray(y_list)
    
            result_list = model.predict(pre_list)
            print(result_list,q)
    
    
            predict_result.append(str(result_list[0] + result_list[1] + result_list[2] + result_list[3]))
    
        return predict_result
    
    
    model = joblib.load('yipai.model')
    predict_result = predict(model)
    # print(source_result)
    # print(predict_result)
  • 相关阅读:
    【学习笔记】ASP.NET页面之间传值的方式之Application
    【学习笔记】ASP.NET页面之间传值的方式之QueryString
    【学习笔记】C#中的装箱(inboxing)和拆箱(unboxing)
    C# Func和Action用法以及区别和使用Lambda表达式
    构建ABP vNext项目并切换MySql数据库
    .Net FrameWork发布项目时报Microsoft.Net.Compilers is only supported on MSBuild v16.3 and above错误解决方案
    Docker容器与Linux主机环境获取时间不一致
    shell脚本中无法使用cd的问题解决方法
    Docker安装Mysql8.0,并配置忽略大小写
    Docker-Compose排版一些坑
  • 原文地址:https://www.cnblogs.com/xuchunlin/p/9456593.html
Copyright © 2011-2022 走看看