zoukankan      html  css  js  c++  java
  • Pytorch之验证码识别

    本文主要实现了两个工作:1.验证码生成   2.Pytorch识别验证码

    一. 验证码生成

    方法1. 利用PIL库的ImageDraw实现绘图,此法参考博客实现:

    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Tue Mar 27 15:45:04 2018
    
    @author: lps
    """
    
    from PIL import Image, ImageDraw, ImageFont, ImageFilter
    import random
    import cv2
    import numpy as np
    import matplotlib.pyplot as plt
    path = '/media/lps/python-3.5.2.amd64/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/'     # 选择字体
    data_path = '/home/lps/yanzm/'
    
    # random chr
    def rndChar():
          return chr(random.randint(65, 90))     # 随机字母
    
    def rndInt():
          return str(random.randint(0,9))        # 随机数字
    
    def rndColor():
          return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))   # 随机颜色
    
    def rndColor2():
          return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127))   # 随机颜色
    
    def gaussian_noise():   # 高斯噪声
          mu =  125
          sigma = 20
          return tuple((np.random.normal(mu, sigma, 3).astype(int)))
    
    def rotate(x, angle):  # 旋转
        M_rotate = cv2.getRotationMatrix2D((x.shape[0]/2, x.shape[1]/2), angle, 1)
        x = cv2.warpAffine(x, M_rotate, (x.shape[0], x.shape[1]))
        return x
        
    width = 180 * 4
    height = 180
    
    def gen_image(num):
          
          for l in range(num): 
          
               image = Image.new('RGB', (width, height), (255, 255, 255))   # 先生成一张大图
        
               font = ImageFont.truetype(path+'cmb10.ttf', 36)
     
               draw = ImageDraw.Draw(image)    # 新的画板
    
               for x in range(0,width):
                     for y in range(0,height):
                           draw.point((x, y), fill=rndColor())
                
               label = []
               
               for t in range(4):    # 每一张验证码4个数字
                     numb = rndInt()
                     draw.text((180 * t + 60+10, 60+10), numb, font=font, fill=rndColor2())
                     label.append(numb)
                     
               with open(data_path+"label.txt","a") as f:
                     for s in label:
                           f.write(s + ' ')
                     f.writelines("
    ")     # 写入label
                     
                
               img = image.filter(ImageFilter.GaussianBlur(radius=0.5))
               img = np.array(img)
          
               img1 = np.array([])
    
               for i in range(0,4):
                     img0 = img[:, 180*i: 180*i+180]   # 提取含有验证码的小图
                     angle = random.randint(-45, 45)
                     img0 = rotate(img0, angle)    # 对小图随机旋转
                
                     if img1.any():
                          img1 = np.concatenate((img1, img0[60:120, 60:120, :]), axis=1)
                
                     else:
                          img1 = img0[60:120, 60:120, :]
                
               plt.imsave(data_path+'src/' + str(l)+'.jpg', img1)     # 保存结果
          
          
    if __name__=='__main__':
          gen_image(100)
    
        
    View Code

    结果大致:

     

    方法2. 利用更专业的库:captcha

    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Sun Mar 25 19:06:46 2018
    
    @author: lps
    """
    from captcha.image import ImageCaptcha
    import numpy as np
    #import matplotlib.pyplot as plt
    from PIL import Image
    import random 
    import cv2
    
    number = ['0','1','2','3','4','5','6','7','8','9']
    alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
    ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
    
    data_path = '/home/lps/yanzm/'
    
    def random_captcha_text(char_set=number,captcha_size=4):    # 可以设置只用来生成数字
        captcha_text = []
        for i in range(captcha_size):
            c = random.choice(char_set)
            captcha_text.append(c)
        return captcha_text
    
    def gen_capthcha_text_and_image(m):
        image = ImageCaptcha()
        captcha_text = random_captcha_text()     # 生成数字
        captcha_text = ' '.join(captcha_text)  # 生成标签
        
        captcha = image.generate(captcha_text)
        
    #    image.write(captcha_text,captcha_text+'.jpg')
        
        captcha_image = Image.open(captcha)
        captcha_image = np.array(captcha_image)
        
        with open(data_path+"label.txt","a") as f:     # 写入标签
                f.write(captcha_text)
                f.writelines("
    ")  
        cv2.imwrite(data_path + '/src/'+'%.4d.jpg'%m,  captcha_image)   # 保存
        
    #    return captcha_text,captcha_image
    
    if __name__ == '__main__':
        
        for m in range(0,5000):
    #          text,image = gen_capthcha_text_and_image()
              gen_capthcha_text_and_image(m)
        
        
    #    f = plt.figure()
    #    ax = f.add_subplot(212)
    #    ax.text(0.1,0.1,text,ha='center',va='center',transform=ax.transAxes)
    #    plt.imshow(image)
    #    plt.show()
    #    
    View Code

    结果大致:

    二. pytorch实现

             对于一张验证码来说作为一张单一的图片,每输入一张图片,得到四个数字作为输出,只有4个数字同时预测正确才表示预测正确。所以在每一张图上是四个多二分类器:因为验证码上面的数字为0-9,类似于mnist,只不过此时一张图片对应于4个数字。所以思路很简单,实现如下:

    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Fri Mar 30 15:46:09 2018
    
    @author: lps
    """
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.autograd import Variable 
    import torch.optim as optim
    import torchvision.models as models
    import torchvision
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms, utils
    import matplotlib.pyplot as plt
    from PIL import Image
    #import pandas as pd
    import numpy as np
    import os
    import copy, time
    
    
    file_path = '/home/lps/yanzm'
    BATCH_SIZE = 16
    EPOCH = 10
    
    # Load data
    class dataset(Dataset):
          
          def __init__(self, root_dir, label_file, transform=None):
                
                self.root_dir = root_dir
                self.label = np.loadtxt(label_file)
                self.transform = transform
                
          def __getitem__(self, idx):
                
                img_name = os.path.join(self.root_dir,'%.4d.jpg'%idx)
                image = Image.open(img_name)
                labels = self.label[idx,:]
                
    #            sample = image
                
                if self.transform:
                     image = self.transform(image)
                
                return image, labels
                
          def __len__(self):
                
                return (self.label.shape[0])
                
    
    data = dataset(file_path+'/src', file_path+'/label.txt',transform=transforms.ToTensor())
    
    dataloader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
    
    dataset_size = len(data)
    
    
    # Conv network
    class ConvNet(nn.Module):
          
          def __init__(self):
                super(ConvNet, self).__init__()
                self.conv =nn.Sequential(
                            nn.Conv2d(3, 32, kernel_size=4, stride=1, padding=2), # in:(bs,3,60,160)
                            nn.BatchNorm2d(32),
                            nn.LeakyReLU(0.2, inplace=True),     
                            nn.MaxPool2d(kernel_size=2),        # out:(bs,32,30,80)
                            
                            nn.Conv2d(32, 64, kernel_size=4, stride=1, padding=2),
                            nn.BatchNorm2d(64),
                            nn.LeakyReLU(0.2, inplace=True),
                            nn.MaxPool2d(kernel_size=2),        # out:(bs,64,15,40)
                            
                            nn.Conv2d(64, 64, kernel_size=3 ,stride=1, padding=1),
                            nn.BatchNorm2d(64),
                            nn.LeakyReLU(0.2, inplace=True),     
                            nn.MaxPool2d(kernel_size=2)         # out:(bs,64,7,20)
                        )
          
                self.fc1 = nn.Linear(64*7*20, 500)
                self.fc2 = nn.Linear(500,40)
          
          def forward(self, x):
                x = self.conv(x)
                x = x.view(x.size(0), -1)    # reshape to (batch_size, 64 * 7 * 30)
                output = self.fc1(x)
                output = self.fc2(output)
                
                return output
    
    
    # Train the net
    class nCrossEntropyLoss(torch.nn.Module):
    
          def __init__(self, n=4):
                super(nCrossEntropyLoss, self).__init__()
                self.n = n
                self.total_loss = 0
                self.loss = nn.CrossEntropyLoss()
            
          def forward(self, output, label):
                output_t = output[:,0:10]
                label = Variable(torch.LongTensor(label.data.cpu().numpy())).cuda()
                label_t = label[:,0]
                
                for i in range(1, self.n):
                      output_t = torch.cat((output_t, output[:,10*i:10*i+10]), 0)   # 损失的思路是将一张图平均剪切为4张小图即4个多分类,然后再用多分类交叉熵方损失
                      label_t = torch.cat((label_t, label[:,i]), 0)         
                      self.total_loss = self.loss(output_t, label_t)
                
                return self.total_loss
    
    
    def equal(np1,np2):
          
          n = 0
          for i in range(np1.shape[0]):
                if (np1[i,:]==np2[i,:]).all():
                      n += 1 
                
          return n
          
    
    net = ConvNet().cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    #loss_func = nn.CrossEntropyLoss()
    loss_func = nCrossEntropyLoss()
    
    best_model_wts = copy.deepcopy(net.state_dict())  
    best_acc = 0.0   
    
    since = time.time()
    for epoch in range(EPOCH):
          
          running_loss=0.0
          running_corrects=0
          
          for step,(inputs,label) in enumerate(dataloader):
                
                pred = torch.LongTensor(BATCH_SIZE,1).zero_()
                inputs = Variable(inputs).cuda()   # (bs, 3, 60, 240)
                label = Variable(label).cuda()   # (bs, 4)
                
                optimizer.zero_grad() 
                
                output = net(inputs)   # (bs, 40)
                loss = loss_func(output, label) 
                
                for i in range(4):
                      pre = F.log_softmax(output[:,10*i:10*i+10], dim=1)  # (bs, 10)
                      pred = torch.cat((pred, pre.data.max(1, keepdim=True)[1].cpu()), dim=1)    # 
                
                loss.backward()
                optimizer.step()
                
                running_loss += loss.data[0] * inputs.size()[0]
                running_corrects += equal(pred.numpy()[:,1:], label.data.cpu().numpy().astype(int))
                            
          epoch_loss = running_loss / dataset_size
          epoch_acc  = running_corrects / dataset_size
                      
          if  epoch_acc > best_acc:   
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(net.state_dict())  
                
          if epoch == EPOCH-1:
                torch.save(best_model_wts, file_path+'/best_model_wts.pkl')
                
          print()
    
          time_elapsed = time.time() - since
          print('Training complete in {:.0f}m {:.0f}s'.format(
                       time_elapsed // 60, time_elapsed % 60))
          print('Train Loss:{:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
               
      

    随机生成5000张图片拿来训练,准确率也会有97%左右。

  • 相关阅读:
    AOP Aspect 统一日志、异常处理、数据格式
    java基本成员默认值
    Jackson ObjectMapper
    logstash 安装 jdbc-output出错
    ElasticSearch定时删除数据(非时间结尾规律索引)
    docker安装部署
    K8s 使用helm 安装 EFK和ELK分布式日志分析系统系列(es版本:6.7.0;)
    JWT 验证
    JS查找数组中元素index
    oracle not in 失效
  • 原文地址:https://www.cnblogs.com/king-lps/p/8724361.html
Copyright © 2011-2022 走看看