zoukankan      html  css  js  c++  java
  • Python Tensorflow CNN 识别验证码

    Python+Tensorflow的CNN技术快速识别验证码

    文章来源于: https://www.jianshu.com/p/26ff7b9075a1

    验证码处理的流程是:验证码分析和处理—— tensorflow安装 —— 模型训练 —— 模型预测

    需要的准备。

      1. 安装TensorFlow

      2.  PIL

      3. numpy

      4. 用于训练的图片

     0.文件目录:

      红色部分有用,其他不用

      

    1. 训练模型的图片:链接:https://pan.baidu.com/s/1kpgt7Pc-ni4WnN6qj8U-pw 密码:nzea

    2.  训练模型代码:

      训练好的模型:链接:https://pan.baidu.com/s/1dNpEtguITKBgbsUU6tCluQ 密码:j07f

    from PIL import Image
    import numpy as np
    import tensorflow as tf
    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
    import random
    
    
    IMAGE_HEIGHT = 114
    IMAGE_WIDTH = 450
    MAX_CAPTCHA = 6
    CHAR_SET_LEN = 26
    
    
    def get_name_and_image():
        all_image = os.listdir('C:\Users\xuchunlin\PycharmProjects\ML\20180402\captcha4\')
        random_file = random.randint(0, 3429)
        base = os.path.basename('C:\Users\xuchunlin\PycharmProjects\ML\20180402\captcha4\' + all_image[random_file])
        name = os.path.splitext(base)[0]
        image = Image.open('C:\Users\xuchunlin\PycharmProjects\ML\20180402\captcha4\' + all_image[random_file])
        image = np.array(image)
        return name, image
    
    
    def name2vec(name):
        vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
        for i, c in enumerate(name):
            idx = i * 26 + ord(c) - 97
            vector[idx] = 1
        return vector
    
    
    def vec2name(vec):
        name = []
        for i in vec:
            a = chr(i + 97)
            name.append(a)
        return "".join(name)
    
    
    # 生成一个训练batch
    def get_next_batch(batch_size=64):
        batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
        batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
    
        for i in range(batch_size):
            name, image = get_name_and_image()
            batch_x[i, :] = 1*(image.flatten())
            batch_y[i, :] = name2vec(name)
        return batch_x, batch_y
    
    ####################################################
    
    X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT*IMAGE_WIDTH])
    Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA*CHAR_SET_LEN])
    keep_prob = tf.placeholder(tf.float32)
    
    
    # 定义CNN
    def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
        x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
        # 3 conv layer
        w_c1 = tf.Variable(w_alpha * tf.random_normal([5, 5, 1, 32]))
        b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
        conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
        conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv1 = tf.nn.dropout(conv1, keep_prob)
    
        w_c2 = tf.Variable(w_alpha * tf.random_normal([5, 5, 32, 64]))
        b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
        conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
        conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv2 = tf.nn.dropout(conv2, keep_prob)
    
        w_c3 = tf.Variable(w_alpha * tf.random_normal([5, 5, 64, 64]))
        b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
        conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
        conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        conv3 = tf.nn.dropout(conv3, keep_prob)
    
        # Fully connected layer
        w_d = tf.Variable(w_alpha * tf.random_normal([15 * 57 * 64, 1024]))
        b_d = tf.Variable(b_alpha * tf.random_normal([1024]))
        dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
        dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
        dense = tf.nn.dropout(dense, keep_prob)
    
        w_out = tf.Variable(w_alpha * tf.random_normal([1024, MAX_CAPTCHA * CHAR_SET_LEN]))
        b_out = tf.Variable(b_alpha * tf.random_normal([MAX_CAPTCHA * CHAR_SET_LEN]))
        out = tf.add(tf.matmul(dense, w_out), b_out)
        return out
    
    
    # 训练
    def train_crack_captcha_cnn():
        output = crack_captcha_cnn()
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    
        predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
        max_idx_p = tf.argmax(predict, 2)
        max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
        correct_pred = tf.equal(max_idx_p, max_idx_l)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
    
            step = 0
            while True:
                batch_x, batch_y = get_next_batch(64)
                _, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.5})
                print(step, loss_)
    
                # 每100 step计算一次准确率
                if step % 100 == 0:
                    batch_x_test, batch_y_test = get_next_batch(100)
                    acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
                    print(step, acc)
                    # 如果准确率大于60%,保存模型,完成训练
                    if acc > 0.6:
                        saver.save(sess, "./crack_capcha.model", global_step=step)
                        break
    
                step += 1
    
    train_crack_captcha_cnn()

    3.  模型测试代码:

        

    def crack_captcha():
        output = crack_captcha_cnn()
    
        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess, tf.train.latest_checkpoint('.'))
            n = 1
            while n <= 10:
                text, image = get_name_and_image()
                image = 1 * (image.flatten())
                predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
                text_list = sess.run(predict, feed_dict={X: [image], keep_prob: 1})
                vec = text_list[0].tolist()
                predict_text = vec2name(vec)
                print("正确: {}  预测: {}".format(text, predict_text))
                n += 1
    
    crack_captcha()

    训练代码和测试代码文件: 链接:https://pan.baidu.com/s/1VY9rYZizCEjHzim3-XaGyw 密码:epv2

    结果展示:

     你会发现识别率并不高,那是因为上面训练模型中有这几行代码

             # 如果准确率大于60%,保存模型,完成训练
                    if acc > 0.6:
                        saver.save(sess, "./crack_capcha.model", global_step=step)
                        break

    设定的准确率只有百分之六十,如果时间充足的话,可以设置0.99或者0.98.会得到一个不错的模型。

    详细讲解请去原网址看,地址:https://www.jianshu.com/p/26ff7b9075a1

    所有学习资料:链接:https://pan.baidu.com/s/19BoO5sUhLrzpL0a9_rNTRQ 密码:q4ri

      

  • 相关阅读:
    thinkphp5.0与thinkphp3.2之间的区别
    比较数组大小
    PHP语言开发微信公众平台(订阅号)之curl命令(补充)
    ThinkPHP3.2.3快速入门:基础篇
    phpcms利用表单向导创建留言板(可以回复)
    Vijos P1782 借教室 ( 前缀和&&差分序列)
    HDU2648:Shopping(DKBR_hash)
    Codeforces Round #375 (Div. 2)
    BestCoder Round #88
    Codeforces Round #373 (Div. 2)
  • 原文地址:https://www.cnblogs.com/xuchunlin/p/8135528.html
Copyright © 2011-2022 走看看