zoukankan html css js c++ java

利用logistic回归解决多分类问题

利用logistic回归解决手写数字识别问题，数据集私聊。

from scipy.io import loadmat
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize

data = loadmat('ex3data1.mat')

data_row = data['X'].shape #5000个200*200的矩阵，表示5000个手写数字
data_cols = data['y'].shape #5000个结果

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y, learningrate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
    reg = (learningrate / (2 * len(X))) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))
    return np.sum(first - second) / len(X) + reg


def gradientReg(theta, X, y, learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    error = sigmoid(X * theta.T) - y

    grad = (((X.T * error) / len(X)).T + ((learningRate) / len(X)) * theta)
    grad[0, 0] = np.sum(np.multiply(error, X[:, 0])) / len(X)

    return np.array(grad).ravel()


def one_vs_all(X, y, num_labels, learning_rate):
    rows = X.shape[0]
    params = X.shape[1]
    all_theta = np.zeros((num_labels, params + 1))

    #在矩阵X前加入一列1
    X = np.insert(X, 0, values=np.ones(rows), axis=1)

    for i in range(1, num_labels + 1):
        theta = np.zeros(params + 1)
        y_i = np.array([1 if label == i else 0 for label in y])
        y_i = np.reshape(y_i, (rows, 1))

        #寻找此分类器的最优参数
        fmin = minimize(fun=cost, x0=theta, args=(X, y_i, learning_rate), method='TNC', jac=gradientReg)
        all_theta[i-1, :] = fmin.x

    return all_theta

num_labels = 10
learningRate = 1
rows = data['X'].shape[0]
params = data['X'].shape[1]

X = np.insert(data['X'], 0, values=np.ones(rows), axis=1)

theta = np.zeros(params + 1)

def predict_all(X, all_theta):

    #在矩阵X前加入一列1
    X = np.insert(X, 0, values=np.ones(rows), axis=1)

    X = np.matrix(X)

    all_theta = np.matrix(all_theta)

    h = sigmoid(X * all_theta.T)
    h_argmax = np.argmax(h, axis=1) #在行方向寻找最大值
    h_argmax = h_argmax + 1
    return h_argmax


all_theta = one_vs_all(data['X'], data['y'], num_labels, 1)

y_predict = predict_all(data['X'], all_theta)
correct = [1 if a == b else 0 for (a, b) in zip(y_predict, data['y'])]
accuracy = (sum(correct) / float(len(correct)))
print('accuracy = {0}%'.format(accuracy * 100))

查看全文

相关阅读:
中间件(1)分布式缓存
 大型网站架构演进(9)服务化
 大型网站架构演进(8)业务拆分
 大型网站架构演进(7)数据库拆分
 大型网站架构演进(6)使用NoSQL和搜索引擎
 大型网站架构演进(5)数据库读写分离
 大型网站架构演进(4)使用应用服务器集群
 大型网站架构演进(3)使用缓存改善网站性能
 大型网站架构演进(2)数据库与应用服务器分离
 大型网站架构演进(1)单机网站

原文地址：https://www.cnblogs.com/qiang-wei/p/9871810.html