zoukankan      html  css  js  c++  java
  • logistics多分类

    multiclassification

    #DATASET: https://archive.ics.uci.edu/ml/datasets/Glass+Identification
    import
    numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn import sklearn.preprocessing as pre
    df=pd.read_csv('dataglassiglass.data')
    X,y=df.iloc[:,1:-1],df.iloc[:,-1]
    X,y=np.array(X),np.array(y)
    
    for idx,class_name in enumerate(sorted(list(set(y)))):
        y[y==class_name]=idx
        
    from sklearn.model_selection import train_test_split
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=66)
    f_mean, f_std = np.mean(X_train, axis=0), np.std(X_train, axis=0)
    X_train = (X_train - f_mean) / f_std
    X_test = (X_test - f_mean) / f_std
    
    #add a constant parameter
    X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
    X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
    #gradient descent function
    
    def get_classifier(X_train,y_train,num_epoch=10000,alpha=0.01):
        theta=np.zeros(X_train.shape[1])
        for epoch in range(num_epoch):
            logist=np.dot(X_train,theta)
            h=1/(1+np.exp(-logist)) #hypothesis function
            cross_entropy_loss=(-y_train*np.log(h)-(1-y_train)*np.log(1-h)).mean()
            gradient=np.dot((h-y_train),X_train)/y_train.size
            theta-=alpha*gradient #update
        return theta
    def multi_classifier(X_train,y_train):
        num_class=np.unique(y_train)
        parameter=np.zeros((len(num_class),X_train.shape[1])) #each has an array of parameters
        for i in num_class:       
            label_t=np.zeros_like(y_train) #use label_t to label the target class!!!
            num_class=np.unique(y_train)
            label_t[y_train==num_class[i]]=1 #important, 
            parameter[i,:]=get_classifier(X_train,label_t) #each array stands for one class's parameter
        return parameter
    params = multi_classifier(X_train, y_train)
    def pred(parameter,X_test,y_test):
        f_size=X_test.shape
        l_size=y_test.shape
        assert (f_size[0]==l_size[0])
        logist=np.dot(X_test,np.transpose(parameter)).squeeze()
        prob=1/(1+np.exp(-logist))
        pred=np.argmax(prob,axis=1)
        accuracy = np.sum(pred == y_test) / l_size[0] * 100   
        return prob, pred, accuracy
    _, preds, accu = pred(params, X_test, y_test)
    print("Prediction: {}
    ".format(preds))
    print("Accuracy: {:.3f}%".format(accu))
    Prediction: [0 1 0 4 1 5 1 0 0 1 0 1 0 0 5 1 1 1 1 0 5 4 0 1 5 0 0 1 1 0 3 1 0]
    
    Accuracy: 66.667%
  • 相关阅读:
    QOMO Linux 4.0 正式版发布
    LinkChecker 8.1 发布,网页链接检查
    pgBadger 2.1 发布,PG 日志分析
    Aletheia 0.1.1 发布,HTTP 调试工具
    Teiid 8.2 Beta1 发布,数据虚拟化系统
    zLogFabric 2.2 发布,集中式日志存储系统
    开源电子工作套件 Arduino Start Kit 登场
    Piwik 1.9 发布,网站访问统计系统
    Ruby 1.9.3p286 发布,安全修复版本
    toBraille 1.1.2 发布,Java 盲文库
  • 原文地址:https://www.cnblogs.com/runsdeep/p/11542232.html
Copyright © 2011-2022 走看看