zoukankan      html  css  js  c++  java
  • 图像验证码识别,字母数字汉子均可cnn+lstm+ctc

    图形验证码如下:

    训练两轮时的准确率:上边显示的是未识别的 

     config_demo.yaml

    System:
      GpuMemoryFraction: 0.7
      TrainSetPath: 'train/'
      TestSetPath: 'test/'
      ValSetPath: 'dev/'
      LabelRegex: '([u4E00-u9FA5]{4,8}).jpg'
      MaxTextLenth: 8
      IMG_W: 200
      IMG_H: 100
      ModelName: 'captcha2.h5'
      Alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    
    
    NeuralNet:
      RNNSize: 256
      Dropout: 0.25
    
    
    TrainParam:
      EarlyStoping:
        monitor: 'val_acc'
        patience: 10
        mode: 'auto'
        baseline: 0.02
      Epochs: 10
      BatchSize: 100
      TestBatchSize: 10
    

      train.py

    # coding=utf-8
    """
    将三通道的图片转为灰度图进行训练
    """
    import itertools
    import os
    import re
    import random
    import string
    from collections import Counter
    from os.path import join
    import yaml
    import cv2
    import numpy as np
    import tensorflow as tf
    from keras import backend as K
    from keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
    from keras.layers import Input, Dense, Activation, Dropout, BatchNormalization, Reshape, Lambda
    from keras.layers.convolutional import Conv2D, MaxPooling2D
    from keras.layers.merge import add, concatenate
    from keras.layers.recurrent import GRU
    from keras.models import Model, load_model
    
    f = open('./config/config_demo.yaml', 'r', encoding='utf-8')
    cfg = f.read()
    cfg_dict = yaml.load(cfg)
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    K.set_session(session)
    
    TRAIN_SET_PTAH = cfg_dict['System']['TrainSetPath']
    VALID_SET_PATH = cfg_dict['System']['TrainSetPath']
    TEST_SET_PATH = cfg_dict['System']['TestSetPath']
    IMG_W = cfg_dict['System']['IMG_W']
    IMG_H = cfg_dict['System']['IMG_H']
    MODEL_NAME = cfg_dict['System']['ModelName']
    LABEL_REGEX = cfg_dict['System']['LabelRegex']
    
    RNN_SIZE = cfg_dict['NeuralNet']['RNNSize']
    DROPOUT = cfg_dict['NeuralNet']['Dropout']
    
    MONITOR = cfg_dict['TrainParam']['EarlyStoping']['monitor']
    PATIENCE = cfg_dict['TrainParam']['EarlyStoping']['patience']
    MODE = cfg_dict['TrainParam']['EarlyStoping']['mode']
    BASELINE = cfg_dict['TrainParam']['EarlyStoping']['baseline']
    EPOCHS = cfg_dict['TrainParam']['Epochs']
    BATCH_SIZE = cfg_dict['TrainParam']['BatchSize']
    TEST_BATCH_SIZE = cfg_dict['TrainParam']['TestBatchSize']
    
    letters_dict = {}
    MAX_LEN = 0
    
    def get_maxlen():
        global MAX_LEN
        maxlen = 0
        lines = open("train.csv", "r", encoding="utf-8").readlines()
        for line in lines:
            name,label = line.strip().split(",")
            if len(label)>maxlen:
                maxlen = len(label)
        MAX_LEN = maxlen
        return maxlen
    
    def get_letters():
        global letters_dict
        letters = ""
    
        lines = open("train.csv","r",encoding="utf-8").readlines()
        maxlen = get_maxlen()
        for line in lines:
            name,label = line.strip().split(",")
            letters = letters+label
            if len(label) < maxlen:
                label = label + '_' * (maxlen - len(label))
            letters_dict[name] = label
    
        if os.path.exists("letters.txt"):
            letters = open("letters.txt","r",encoding="utf-8").read()
            return letters
        return "".join(set(letters))
    
    letters = get_letters()
    f_W = open("letters.txt","w",encoding="utf-8")
    f_W.write("".join(letters))
    class_num = len(letters) + 1   # plus 1 for blank
    print('Letters:', ''.join(letters))
    print("letters_num:",class_num)
    
    def labels_to_text(labels):
        return ''.join([letters[int(x)] if int(x) != len(letters) else '' for x in labels])
    
    def text_to_labels(text):
        return [letters.find(x) if letters.find(x) > -1 else len(letters) for x in text]
    
    
    def is_valid_str(s):
        for ch in s:
            if not ch in letters:
                return False
        return True
    
    
    class TextImageGenerator:
    
        def __init__(self,
                     dirpath,
                     tag,
                     img_w, img_h,
                     batch_size,
                     downsample_factor,
                     ):
            global letters_dict
            self.img_h = img_h
            self.img_w = img_w
            self.batch_size = batch_size
            self.downsample_factor = downsample_factor
            self.letters_dict = letters_dict
            self.n = len(self.letters_dict)
            self.indexes = list(range(self.n))
            self.cur_index = 0
            self.imgs = np.zeros((self.n, self.img_h, self.img_w))
            self.texts = []
    
            for i, (img_filepath, text) in enumerate(self.letters_dict.items()):
    
    
                img_filepath = dirpath+img_filepath
                if i == 0:
                    img_filepath = "train/0.jpg"
                img = cv2.imread(img_filepath)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)     # cv2默认是BGR模式
                img = cv2.resize(img, (self.img_w, self.img_h))
                img = img.astype(np.float32)
                img /= 255
                self.imgs[i, :, :] = img
                self.texts.append(text)
            print(len(self.texts),len(self.imgs),self.n)
    
        @staticmethod
        def get_output_size():
            return len(letters) + 1
    
        def next_sample(self):   #每次返回一个数据和对应标签
            self.cur_index += 1
            if self.cur_index >= self.n:
                self.cur_index = 0
                random.shuffle(self.indexes)
            return self.imgs[self.indexes[self.cur_index]], self.texts[self.indexes[self.cur_index]]
    
        def next_batch(self):   #
            while True:
                # width and height are backwards from typical Keras convention
                # because width is the time dimension when it gets fed into the RNN
                if K.image_data_format() == 'channels_first':
                    X_data = np.ones([self.batch_size, 1, self.img_w, self.img_h])
                else:
                    X_data = np.ones([self.batch_size, self.img_w, self.img_h, 1])
                Y_data = np.ones([self.batch_size, MAX_LEN])
                input_length = np.ones((self.batch_size, 1)) * (self.img_w // self.downsample_factor - 2)
                label_length = np.zeros((self.batch_size, 1))
                source_str = []
    
                for i in range(self.batch_size):
                    img, text = self.next_sample()
                    img = img.T
                    if K.image_data_format() == 'channels_first':
                        img = np.expand_dims(img, 0)     #增加一个维度
                    else:
                        img = np.expand_dims(img, -1)
                    X_data[i] = img
                    Y_data[i] = text_to_labels(text)
                    source_str.append(text)
                    text = text.replace("_", "")  # important step
                    label_length[i] = len(text)
    
                inputs = {
                    'the_input': X_data,
                    'the_labels': Y_data,
                    'input_length': input_length,
                    'label_length': label_length,
                    # 'source_str': source_str
                }
                outputs = {'ctc': np.zeros([self.batch_size])}
                yield (inputs, outputs)
    
    # # Loss and train functions, network architecture
    def ctc_lambda_func(args):    #ctc损失是时间序列损失函数
        y_pred, labels, input_length, label_length = args
        # the 2 is critical here since the first couple outputs of the RNN
        # tend to be garbage:
        y_pred = y_pred[:, 2:, :]
        return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
    
    
    downsample_factor = 4
    
    
    def train(img_w=IMG_W, img_h=IMG_H, dropout=DROPOUT, batch_size=BATCH_SIZE, rnn_size=RNN_SIZE):
        # Input Parameters
        # Network parameters
        conv_filters = 16
        kernel_size = (3, 3)
        pool_size = 2
        time_dense_size = 32
    
        if K.image_data_format() == 'channels_first':
            input_shape = (1, img_w, img_h)
        else:
            input_shape = (img_w, img_h, 1)
    
        global downsample_factor
        downsample_factor = pool_size ** 2
        tiger_train = TextImageGenerator(TRAIN_SET_PTAH, 'train', img_w, img_h, batch_size, downsample_factor)
        tiger_val = TextImageGenerator(VALID_SET_PATH, 'val', img_w, img_h, batch_size, downsample_factor)
    
        act = 'relu'
        input_data = Input(name='the_input', shape=input_shape, dtype='float32')
        inner = Conv2D(conv_filters, kernel_size, padding='same',
                       activation=None, kernel_initializer='he_normal',
                       name='conv1')(input_data)
        inner = BatchNormalization()(inner)  # add BN
        inner = Activation(act)(inner)
    
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
        inner = Conv2D(conv_filters, kernel_size, padding='same',
                       activation=None, kernel_initializer='he_normal',
                       name='conv2')(inner)
        inner = BatchNormalization()(inner)  # add BN
        inner = Activation(act)(inner)
    
        inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
    
        conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
    
        # cuts down input size going into RNN:
        inner = Dense(time_dense_size, activation=None, name='dense1')(inner)
        inner = BatchNormalization()(inner)  # add BN
        inner = Activation(act)(inner)
        if dropout:
            inner = Dropout(dropout)(inner)  # 防止过拟合
    
        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
        gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(
            inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(
            gru1_merged)
    
        inner = concatenate([gru_2, gru_2b])
    
        if dropout:
            inner = Dropout(dropout)(inner)  # 防止过拟合
    
        # transforms RNN output to character activations:
        inner = Dense(tiger_train.get_output_size(), kernel_initializer='he_normal',
                      name='dense2')(inner)
        y_pred = Activation('softmax', name='softmax')(inner)
        base_model = Model(inputs=input_data, outputs=y_pred)
        base_model.summary()
    
        labels = Input(name='the_labels', shape=[MAX_LEN], dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
    
        model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
        # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
        model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta')
    
        earlystoping = EarlyStopping(monitor=MONITOR, patience=PATIENCE, verbose=1, mode=MODE, baseline=BASELINE)
        train_model_path = './tmp/train_' + MODEL_NAME
        checkpointer = ModelCheckpoint(filepath=train_model_path,
                                       verbose=1,
                                       save_best_only=True)
    
        if os.path.exists(train_model_path):
            model.load_weights(train_model_path)
            print('load model weights:%s' % train_model_path)
    
        evaluator = Evaluate(model)
        model.fit_generator(generator=tiger_train.next_batch(),
                            steps_per_epoch=tiger_train.n,
                            epochs=EPOCHS,
                            initial_epoch=1,
                            validation_data=tiger_val.next_batch(),
                            validation_steps=tiger_val.n,
                            callbacks=[checkpointer, earlystoping, evaluator])
    
        print('----train end----')
    
    
    # For a real OCR application, this should be beam search with a dictionary
    # and language model.  For this example, best path is sufficient.
    def decode_batch(out):
        ret = []
        for j in range(out.shape[0]):
            out_best = list(np.argmax(out[j, 2:], 1))
            out_best = [k for k, g in itertools.groupby(out_best)]
            outstr = ''
            for c in out_best:
                if c < len(letters):
                    outstr += letters[c]
            ret.append(outstr)
        return ret
    
    
    class Evaluate(Callback):
        def __init__(self, model):
            self.accs = []
            self.model = model
    
        def on_epoch_end(self, epoch, logs=None):
            acc = evaluate(self.model)
            self.accs.append(acc)
    
    
    # Test on validation images
    def evaluate(model):
        global downsample_factor
        tiger_test = TextImageGenerator(VALID_SET_PATH, 'test', IMG_W, IMG_H, TEST_BATCH_SIZE, downsample_factor)
    
        net_inp = model.get_layer(name='the_input').input
        net_out = model.get_layer(name='softmax').output
        predict_model = Model(inputs=net_inp, outputs=net_out)
    
        equalsIgnoreCaseNum = 0.00
        equalsNum = 0.00
        totalNum = 0.00
        for inp_value, _ in tiger_test.next_batch():
            batch_size = inp_value['the_input'].shape[0]
            X_data = inp_value['the_input']
    
            net_out_value = predict_model.predict(X_data)
            pred_texts = decode_batch(net_out_value)
            labels = inp_value['the_labels']
            texts = []
            for label in labels:
                text = labels_to_text(label)
                texts.append(text)
    
            for i in range(batch_size):
    
                totalNum += 1
                if pred_texts[i] == texts[i]:
                    equalsNum += 1
                if pred_texts[i].lower() == texts[i].lower():
                    equalsIgnoreCaseNum += 1
                else:
                    print('Predict: %s ---> Label: %s' % (pred_texts[i], texts[i]))
            if totalNum >= 10000:
                break
        print('---Result---')
        print('Test num: %d, accuracy: %.5f, ignoreCase accuracy: %.5f' % (
        totalNum, equalsNum / totalNum, equalsIgnoreCaseNum / totalNum))
        return equalsIgnoreCaseNum / totalNum
    
    
    if __name__ == '__main__':
        train()
        test = True
        if test:
            model_path = './tmp/train_' + MODEL_NAME
            model = load_model(model_path, compile=False)
            evaluate(model)
        print('----End----')
    

      interface_testset.py

    import itertools
    import string
    import yaml
    from tqdm import tqdm
    import cv2
    import numpy as np
    import os
    import tensorflow as tf
    from keras import backend as K
    from keras.models import Model, load_model
    
    f = open('./config/config_demo.yaml', 'r', encoding='utf-8')
    cfg = f.read()
    cfg_dict = yaml.load(cfg)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
    K.set_session(session)
    
    MODEL_NAME = cfg_dict['System']['ModelName']
    
    
    letters = string.ascii_uppercase + string.ascii_lowercase+string.digits
    
    def decode_batch(out):
        ret = []
        for j in range(out.shape[0]):
            out_best = list(np.argmax(out[j, 2:], 1))
            out_best = [k for k, g in itertools.groupby(out_best)]
            outstr = ''
            for c in out_best:
                if c < len(letters):
                    outstr += letters[c]
            ret.append(outstr)
        return ret
    
    def get_x_data(img_data, img_w, img_h):
        img = cv2.cvtColor(img_data, cv2.COLOR_RGB2GRAY)
        img = cv2.resize(img, (img_w, img_h))
        img = img.astype(np.float32)
        img /= 255
        batch_size = 1
        if K.image_data_format() == 'channels_first':
            X_data = np.ones([batch_size, 1, img_w, img_h])
        else:
            X_data = np.ones([batch_size, img_w, img_h, 1])
        img = img.T
        if K.image_data_format() == 'channels_first':
            img = np.expand_dims(img, 0)
        else:
            img = np.expand_dims(img, -1)
        X_data[0] = img
        return X_data
    
    # Test on validation images
    def interface(datapath ="./testset" ,img_w = 200,img_h = 100):
    
        save_file = open("answer.csv","a",encoding="utf-8")
        save_file.truncate()
        model_path = './tmp/train_' + MODEL_NAME
        model = load_model(model_path, compile=False)
    
        net_inp = model.get_layer(name='the_input').input
        net_out = model.get_layer(name='softmax').output
        predict_model = Model(inputs=net_inp, outputs=net_out)
    
        print("开始预测,预测结果:")
        listdir = os.listdir(datapath)
    
        bar = tqdm(range(len(listdir)),total=len(listdir))
        for idx in bar:
            img_data = cv2.imread(datapath+"/" + str(idx) + ".jpg")
            X_data = get_x_data(img_data, img_w, img_h)
            net_out_value = predict_model.predict(X_data)
            pred_texts = decode_batch(net_out_value)
            #print(str(idx) + ".jpg" + "	", pred_texts[0])
            save_file.write(str(idx)+","+pred_texts[0]+"
    ")
    
    if __name__ == '__main__':
        interface(datapath="./testset")
    

      

  • 相关阅读:
    基本HAL库操作函数整理
    oled(iic协议)
    Uart串口中断收发
    博主回来啦
    博主的冒泡1
    AFO

    起床困难综合症
    费解的开关
    数独
  • 原文地址:https://www.cnblogs.com/LiuXinyu12378/p/13151167.html
Copyright © 2011-2022 走看看