zoukankan      html  css  js  c++  java
  • 机器学习技法笔记:Homework #6 AdaBoost&Kernel Ridge Regression相关习题

    原文地址:http://www.jianshu.com/p/9bf9e2add795

    AdaBoost

    问题描述

    图1 问题描述
    图2 12-18

    程序实现

    # coding:utf-8
    
    import math
    import numpy as np
    import matplotlib.pyplot as plt
    
    
    def ReadData(dataFile):
    
        with open(dataFile, 'r') as f:
            lines = f.readlines()
            data_list = []
            for line in lines:
                line = line.strip().split()
                data_list.append([float(l) for l in line])
            dataArray = np.array(data_list)
            return dataArray
    
    
    def sign(n):
    
        if(n>=0):
            return 1
        else:
            return -1
    
    
    def GetSortedArray(dataArray,i):
         # 根据dataArray第i列的值对dataArray进行从小到大的排序
        data_list=dataArray.tolist()
        sorted_data_list=sorted(data_list,key=lambda x:x[i],reverse=False)
        sortedDataArray=np.array(sorted_data_list)
        return sortedDataArray
    
    
    def GetUZeroOneError(pred,dataY,u):
        return np.sum(u*np.not_equal(pred,dataY))/np.sum(u)
    
    
    def GetZeroOneError(pred,dataY):
        return np.sum(np.not_equal(pred,dataY))/dataY.shape[0]
    
    
    def decision_stump(dataArray,u):
    
        num_data=dataArray.shape[0]
        num_dim=dataArray.shape[1]-1
        min_e=np.inf
        min_s = np.inf
        min_d=np.inf
        min_theta = np.inf
        min_pred = np.zeros((num_data,))
        for d in range(num_dim):
            sortedDataArray=GetSortedArray(dataArray,d) # 确保有效theta
            d_min_e=np.inf
            d_min_s = np.inf
            d_min_theta = np.inf
            d_min_pred = np.zeros((num_data,))
            for s in [-1.0,1.0]:
                for i in range(num_data):
                    if(i==0):
                        theta=-np.inf
                        pred=s*np.ones((num_data,))
                    else:
                        if sortedDataArray[i-1,d]==sortedDataArray[i,d]:
                            continue
                        theta=(sortedDataArray[i-1,d]+sortedDataArray[i,d])/2
                        pred=np.zeros((num_data,))
                        for n in range(num_data):
                            pred[n]=s*sign(dataArray[n,d]-theta)
                    d_now_e=GetUZeroOneError(pred,dataArray[:,-1],u)
                    if(d_now_e<d_min_e):
                        d_min_e=d_now_e
                        d_min_s=s
                        d_min_theta=theta
                        d_min_pred=pred
            if(d_min_e<min_e):
                min_e=d_min_e
                min_s=d_min_s
                min_d=d
                min_theta=d_min_theta
                min_pred=d_min_pred
        return min_s,min_d,min_theta,min_pred,min_e
    
    
    def Pred(paraList,dataX):
        # paraList=[s,d,theta]
        num_data=dataX.shape[0]
        pred=np.zeros((num_data,))
        for i in range(num_data):
            pred[i]=paraList[0]*sign(dataX[i,paraList[1]]-paraList[2])
        return pred
    
    
    def plot_line_chart(X=np.arange(0,300,1).tolist(),Y=np.arange(0,300,1).tolist(),nameX="t",nameY="Ein(gt)",saveName="12.png"):
    
        plt.figure(figsize=(30,12))
        plt.plot(X,Y,'b')
        plt.plot(X,Y,'ro')
        plt.xlim((X[0]-1,X[-1]+1))
        for (x,y) in zip(X,Y):
            if(x%10==0):
                plt.text(x+0.1,y,str(round(y,4)))
        plt.xlabel(nameX)
        plt.ylabel(nameY)
        plt.title(nameY+" versus "+nameX)
        plt.savefig(saveName)
        return
    
    
    if __name__=="__main__":
    
        dataArray=ReadData("hw2_adaboost_train.dat")
        dataY=dataArray[:,-1]
        dataX=dataArray[:,:-1]
        num_data=dataArray.shape[0]
        u=np.full(shape=(num_data,),fill_value=1/num_data)
        ein_g_list=[]
        alpha_list=[]
        g_list=[]
        ein_G_list=[]
        u_sum_list=[]
        epi_list=[]
        min_pred_list=[]
        
        # adaboost
        for t in range(300):
            u_sum_list.append(np.sum(u))
            min_s,min_d,min_theta,min_pred,epi=decision_stump(dataArray,u)
            g_list.append([min_s,min_d,min_theta])
            min_pred_list.append(min_pred)
            ein_g=GetZeroOneError(min_pred,dataY)
            ein_g_list.append(ein_g)
            epi_list.append(epi)
            para=math.sqrt((1-epi)/epi)
            alpha_list.append(math.log(para))
            for i in range(num_data):
                if min_pred[i]==dataY[i]:
                    u[i]/=para
                else:
                    u[i]*=para
            predG=np.zeros((num_data,))
            for ta in range(t):
                predG+=alpha_list[ta]*min_pred_list[ta]
            for n in range(num_data):
                predG[n]=sign(predG[n])
            ein_G_list.append(GetZeroOneError(predG,dataY))
    
        # 12
        plot_line_chart(Y=ein_g_list)
        print("Ein(g1):",ein_g_list[0])
        print("alpha1:",alpha_list[0])
    
        # 14
        plot_line_chart(Y=ein_G_list,nameY="Ein(Gt)",saveName="14.png")
        print("Ein(G):",ein_G_list[-1])
    
        # 15
        plot_line_chart(Y=u_sum_list, nameY="Ut", saveName="15.png")
        print("U2:",u_sum_list[1])
        print("UT:",u_sum_list[-1])
    
        # 16
        plot_line_chart(Y=epi_list,nameY="epsilon_t",saveName="16.png")
        print("the minimum value of epsilon_t:",min(epi_list))
    
        testArray=ReadData("hw2_adaboost_test.dat")
        num_test=testArray.shape[0]
        testX=testArray[:,:-1]
        testY=testArray[:,-1]
        pred_g_list=[]
        eout_g_list=[]
        eout_G_list=[]
        for t in range(300):
            pred_g=Pred(g_list[t],testX)
            pred_g_list.append(pred_g)
            eout_g_list.append(GetZeroOneError(pred_g,testY))
            pred_G=np.zeros((num_test,))
            for ta in range(t):
                pred_G+=alpha_list[ta]*pred_g_list[ta]
            sign_ufunc=np.frompyfunc(sign,1,1)
            pred_G=sign_ufunc(pred_G)
            eout_G_list.append(GetZeroOneError(pred_G,testY))
    
        # 17
        plot_line_chart(Y=eout_g_list, nameY="Eout(gt)", saveName="17.png")
        print("Eout(g1):",eout_g_list[0])
    
        # 18
        plot_line_chart(Y=eout_G_list, nameY="Eout(Gt)", saveName="18.png")
        print("Eout(G):",eout_G_list[-1])
    
    

    运行结果

    图3 运行结果
    图4 12结果
    图5 14结果
    图6 15结果
    图7 16结果
    图8 17结果
    图9 18结果

    Kernel Ridge Regression

    问题描述

    图10 19-20

    程序实现

    # coding:utf-8
    
    import numpy as np
    import math
    
    
    def ReadData(dataFile):
    
        with open(dataFile, 'r') as f:
            lines = f.readlines()
            data_list = []
            for line in lines:
                line = line.strip().split()
                data_list.append([1.0]+[float(l) for l in line])
            dataArray = np.array(data_list)
            return dataArray
    
    
    def sign(n):
    
        if(n>=0):
            return 1
        else:
            return -1
    
    
    def RBFKernel(X1,X2,gamma):
        return math.exp(-gamma*np.sum(np.square(X1-X2)))
    
    
    def GetKernelMatrix(trainX,dataX,gamma):
        num_train = trainX.shape[0]
        num_data = dataX.shape[0]
        mat = np.zeros((num_train,num_data))
        for i in range(num_train):
            if num_train==num_data and np.equal(trainX,dataX).all():
                for j in range(i+1):
                    mat[i][j] = RBFKernel(dataX[i, :], dataX[j, :], gamma)
                    if(i!=j):
                        mat[j][i]=mat[i][j]
            else:
                for j in range(num_data):
                    mat[i][j]=RBFKernel(trainX[i,:],dataX[j,:],gamma)
        return mat
    
    
    def GetZeroOneError(pred,dataY):
        return np.sum(np.not_equal(pred,dataY))/dataY.shape[0]
    
    
    def KernelRidgeRegression(trainArray,lamb,gamma):
        num_train=trainArray.shape[0]
        trainX=trainArray[:,:-1]
        trainY=trainArray[:,-1].reshape((num_train,1))
        K=GetKernelMatrix(trainX,trainX,gamma)
        beta=np.dot(np.linalg.inv(lamb*np.eye(num_train)+K),trainY)
        return beta
    
    
    def Predict(trainX,dataX,beta,gamma):
        num_data=dataX.shape[0]
        pred=np.zeros((num_data,))
        K=GetKernelMatrix(trainX,dataX,gamma)
        pred=np.dot(K.transpose(),beta).reshape((num_data,))
        for n in range(num_data):
            pred[n]=sign(pred[n])
        return pred
    
    
    if __name__=="__main__":
        dataArray=ReadData("hw2_lssvm_all.dat")
        trainArray=dataArray[:400,:]
        testArray=dataArray[400:,:]
        gammaList=[32,2,0.125]
        lambdaList=[0.001,1,1000]
        ein_list=[]
        eout_list=[]
        for l in lambdaList:
            for g in gammaList:
                beta=KernelRidgeRegression(trainArray,l,g)
                ein_list.append(GetZeroOneError(Predict(trainArray[:,:-1],trainArray[:,:-1],beta,g),trainArray[:,-1]))
                eout_list.append(GetZeroOneError(Predict(trainArray[:,:-1],testArray[:,:-1],beta,g),testArray[:,-1]))
        min_ein=min(ein_list)
        min_ein_id=ein_list.index(min_ein)
        min_eout=min(eout_list)
        min_eout_id=eout_list.index(min_eout)
    
        # 19
        print("the minimum Ein(g):",min_ein,",the corresponding parameter combinations: gamma=",gammaList[min_ein_id%3],",lambda=",lambdaList[min_ein_id//3])
        # 20
        print("the minimum Eout(g):",min_eout,",the corresponding parameter combinations: gamma=",gammaList[min_eout_id%3],",lambda=",lambdaList[min_eout_id//3])
    

    运行结果

    图11 运行结果

  • 相关阅读:
    华为的管理变革之路
    产品创新型组织变革的四个阶段
    新产品如何在市场上快速取得成功?
    如何做好基础技术的创新?
    产品创新型总经理应具备哪些方面的素质?
    项目型组织如何快速过渡到产品型组织?
    华为是如何做技术规划和产品路标开发的?
    华为干部选拔和任用的标准
    导论:1、大学计算机——2、计算机信息数字化基础——二进制&数字化(数制)
    导论:1、大学计算机——1、计算机与问题求解
  • 原文地址:https://www.cnblogs.com/cherrychenlee/p/10803317.html
Copyright © 2011-2022 走看看