zoukankan      html  css  js  c++  java
  • 《西瓜书》第五章,神经网络2:单隐层感知器

    ▶ 使用一个隐藏层的神经网络来为散点作分类

    ● 代码

      1 import numpy as np
      2 import matplotlib.pyplot as plt
      3 from mpl_toolkits.mplot3d import Axes3D
      4 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
      5 from matplotlib.patches import Rectangle
      6 
      7 dataSize = 1000                         # 总数据大小
      8 trainRatio = 0.3                        # 训练集占比
      9 ita = 0.4                               # 学习率
     10 epsilon = 0.01                          # 单轮累计误差要求
     11 maxTurn = 500                           # 最大训练轮数
     12 
     13 def dataSplit(data, part):              # 将数据集分割为训练集和测试集
     14     return data[0:part,:],data[part:,:]
     15 
     16 def myHash(data):                       # 将输出的类别向量压成一维索引
     17     size = len(data)
     18     carry = 2                           # 每个输出维度只有(0,1)两个值,使用二进制
     19     output = 0
     20     for i in range(size):
     21         output = output * carry + data[i].astype(int)
     22     return output
     23 
     24 def myColor(x):                                                         # 颜色函数,用于对散点染色
     25     r = np.select([x < 1/2, x < 3/4, x <= 1, True],[0, 4 * x - 2, 1, 0])
     26     g = np.select([x < 1/4, x < 3/4, x <= 1, True],[4 * x, 1, 4 - 4 * x, 0])
     27     b = np.select([x < 1/4, x < 1/2, x <= 1, True],[1, 2 - 4 * x, 0, 0])
     28     return [r,g,b]
     29 
     30 def sigmoid(x):                                                         # sigmoid 函数
     31     return 1 / (1 + np.exp(-x))
     32 
     33 def function(x,para):                                                   # 回归函数
     34     dimIn = np.shape(x)[0]    
     35     t = sigmoid(np.sum(x * para[0],1) - para[1])             
     36     w = sigmoid(np.sum(t * para[2],1) - para[3])                 
     37     return w
     38 
     39 def judge(x, para):                                                     # 分类函数
     40     return (function(x, para) > 0.5*np.ones(len(para[3]))).astype(int)
     41 
     42 def createData(dimIn, dimOut, len):                                     # 生成测试数据,输出一个 len 行 dimIn + dimOut 列的矩阵
     43     np.random.seed(103)                                                 # 该矩阵后 dimOut 列是由前 dimIn 列 乘以一个变换阵 transform 得到的
     44     output = np.zeros([len,dimIn+dimOut])
     45     transform = np.random.rand(dimIn, dimOut)
     46     output[:,:dimIn] = np.random.rand(len, dimIn)        
     47     temp = np.matmul(output[:,:dimIn],transform)
     48     cut = [ sorted(temp[:,i])[int(len/2)] for i in range(dimOut) ]
     49     output[:,dimIn:] = (np.matmul(output[:,:dimIn],transform) > np.array(cut)).astype(int)
     50     #print(transform)                                                    # 输出变换矩阵
     51     #count = np.zeros(2**dimOut,dtype=int)                               # 输出各类别的频数
     52     #for i in range(len):
     53     #    count[myHash(output[i,dimIn:])]+=1
     54     #for i in range(2**dimOut):
     55     #    print("kind %d-> %4d"%(i,count[i]))
     56     return output   
     57 
     58 def bp(data,dimIn,dimMid,dimOut):                                       # 单隐层前馈感知机
     59     len = np.shape(data)[0]                
     60     paraIM = np.random.rand(dimMid,dimIn)                               # 随机初值
     61     paraM  = np.random.rand(dimMid)
     62     paraMO = np.random.rand(dimOut,dimMid)
     63     paraO  = np.random.rand(dimOut)   
     64     #for Debug (3,4,1)
     65     #paraIM = np.array([[ 2.44978959,  1.03356222,  1.25171316],[ 8.22395895,  3.2859132 ,  2.93199359],
     66     #                   [ 9.04487454,  3.58879071,  3.2453638 ],[-0.21185973,  1.01962831,  0.30603421]])
     67     #paraM  = np.array([ 2.37018489,  7.43062236,  8.16598833, -2.18858882])
     68     #paraMO = np.array([[ 2.0904244 ,  9.27716123, 10.35091348, -3.48496074]])
     69     #paraO  = np.array([7.38193999])
     70     # for Debug (3,4,2)
     71     #paraIM = np.array([[ 1.17628308, -4.79611491,  7.82715612],[11.86906762,  4.17981756, 15.40884171],
     72     #                   [ 1.78954837, 12.80408325,  7.59701256],[ 3.39085252,  9.78864594,  5.61460097]])
     73     #paraM  = np.array([ 0.31998583, 15.6483892 , 11.30653251,  9.63319875])
     74     #paraMO = np.array([[ 4.58270357, 18.69604396, -1.50649708,  5.19598108],[-5.82564925,  2.32813282, 14.310699  ,  9.7612375 ]])
     75     #paraO  = np.array([13.58839319, 10.01680799])    
     76     #for Debug (3,4,3)
     77     #paraIM = np.array([[ 7.84220612, 11.00858871, 15.40057016],[ 4.73432349, 11.65162087,  3.72713842],
     78     #                   [ 6.76352431, 18.33213946, 12.05786182],[ 1.10551116, 10.86430962, 13.12483415]])
     79     #paraM  = np.array([17.4944    ,  9.28877255, 19.01824028, 13.60807014])
     80     #paraMO = np.array([[ 17.1177851 ,   4.74404933,   7.25812914,   2.83400745],
     81     #                   [  8.72608932, -12.893494  ,   8.11165234,  18.13834784],
     82     #                   [  4.01719631,   8.32514887,  15.03075945,   3.91946772]])
     83     #paraO  = np.array([17.0558952 ,  8.31617509, 15.87453088])
     84 
     85     count = 0
     86     while count < maxTurn:        
     87         error = 0.0
     88         for i in range(len):            
     89             x = data[i,:dimIn]                                             # 真实输入,1 × dimIn
     90             y = data[i,dimIn:]                                              # 真实输出,1 × dimOut
     91             t = sigmoid(np.sum(x * paraIM,1) - paraM)                       # 中间层输出,1 × dimMid
     92             w = sigmoid(np.sum(t * paraMO,1) - paraO)                       # 输出层输出,1 × dimOut
     93             g = w * (1 - w) * (y - w)                                       # 1 × dimOut            
     94             e = t * (1 - t) * np.sum(g * paraMO.T,1)                        # 1 × dimMid                        
     95             paraMO += ita * np.tile(g,[dimMid,1]).T * np.tile(t,[dimOut,1])
     96             paraO  += -ita * g            
     97             paraIM += ita * np.tile(e,[dimIn,1]).T * np.tile(x,[dimMid,1])
     98             paraM  += -ita * e                                                           
     99             error  += np.sum((y - w)**2)            
    100         error /= len
    101         count += 1        
    102         print("count = ", count, ", error = ", error)
    103         if error < epsilon:
    104             break            
    105     return (paraIM, paraM, paraMO, paraO)                                   # 返回两个系数矩阵和两个偏移向量
    106 
    107 def test(dimIn, dimMid, dimOut):                                            # 测试函数,给定输入维度,中间层神经元个数,输出维度
    108     allData = createData(dimIn, dimOut, dataSize)
    109     trainData, testData = dataSplit(allData, int(dataSize * trainRatio))
    110 
    111     para = bp(trainData,dimIn,dimMid,dimOut)
    112     
    113     myResult = [ judge(i[0:dimIn], para) for i in testData ]
    114     errorRatio = np.sum((np.array(myResult) - testData[:,dimIn:].astype(int))**2) / (dataSize*(1 - trainRatio) * dimOut)
    115     print("dimIn = "+ str(dimIn) + ", dimMid = "+ str(dimMid) + ", dimOut = " + str(dimOut) + ", errorRatio = " + str(round(errorRatio,4)))
    116     
    117     if dimOut >= 4:                                                         # 画图部分,4维以上不画图,只输出测试错误率
    118         return
    119     errorP = []                                                             
    120     classP = [ [] for i in range(2**dimOut) ]
    121     for i in range(np.shape(testData)[0]):
    122         if (myResult[i] != testData[i,dimIn:]).any():
    123             errorP.append(testData[i,:dimIn])
    124         else:             
    125             classP[myHash(testData[i,dimIn:])].append(testData[i,:dimIn])    
    126     errorP = np.array(errorP)
    127     classP = [ np.array(classP[i]) for i in range(2**dimOut) ]
    128 
    129     fig = plt.figure(figsize=(10, 8))                
    130     
    131     if dimIn == 1:
    132         plt.xlim(0.0,1.0)
    133         plt.ylim(-0.25,1.25)              
    134         for i in range(2**dimOut):
    135             plt.scatter(classP[i][:,0], np.ones(len(classP[i]))*i, color = myColor(i/(2**dimOut)), s = 10, label = "class" + str(i))                               
    136         if len(errorP) != 0:
    137             plt.scatter(errorP[:,0], (errorP[:,0]>0.5).astype(int), color = myColor(1), s = 10, label = "errorData")
    138         plt.text(0.75, 0.92, "errorRatio = " + str(round(errorRatio,4)), 
    139             size = 15, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1., 0.5, 0.5), fc=(1., 1., 1.)))
    140         R = [ Rectangle((0,0),0,0, color = myColor(i / (2**dimOut))) for i in range(2**dimOut) ] + [ Rectangle((0,0),0,0, color = myColor(1)) ]
    141         plt.legend(R, [ "class" + str(i) for i in range(2**dimOut) ] + ["errorData"], loc=[0.84, 0.012], ncol=1, numpoints=1, framealpha = 1)
    142     
    143     if dimIn == 2:        
    144         plt.xlim(0.0,1.0)
    145         plt.ylim(0.0,1.0)        
    146         for i in range(2**dimOut):
    147             plt.scatter(classP[i][:,0], classP[i][:,1], color = myColor(i/(2**dimOut)), s = 10, label = "class" + str(i))                               
    148         if len(errorP) != 0:
    149             plt.scatter(errorP[:,0], errorP[:,1], color = myColor(1), s = 10, label = "errorData")
    150         plt.text(0.75, 0.92, "errorRatio = " + str(round(errorRatio,4)), 
    151             size = 15, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1., 0.5, 0.5), fc=(1., 1., 1.)))
    152         R = [ Rectangle((0,0),0,0, color = myColor(i / (2**dimOut))) for i in range(2**dimOut) ] + [ Rectangle((0,0),0,0, color = myColor(1)) ]
    153         plt.legend(R, [ "class" + str(i) for i in range(2**dimOut) ] + ["errorData"], loc=[0.84, 0.012], ncol=1, numpoints=1, framealpha = 1)
    154 
    155     if dimIn == 3:        
    156         ax = Axes3D(fig)
    157         ax.set_xlim3d(0.0, 1.0)
    158         ax.set_ylim3d(0.0, 1.0)
    159         ax.set_zlim3d(0.0, 1.0)
    160         ax.set_xlabel('X', fontdict={'size': 15, 'color': 'k'})
    161         ax.set_ylabel('Y', fontdict={'size': 15, 'color': 'k'})
    162         ax.set_zlabel('Z', fontdict={'size': 15, 'color': 'k'})        
    163         for i in range(2**dimOut):
    164             ax.scatter(classP[i][:,0], classP[i][:,1],classP[i][:,2], color = myColor(i/(2**dimOut)), s = 10, label = "class" + str(i))                               
    165         if len(errorP) != 0:
    166             ax.scatter(errorP[:,0], errorP[:,1],errorP[:,2], color = myColor(1), s = 10, label = "errorData")
    167         ax.text3D(0.8, 1, 1.15, "errorRatio = " + str(round(errorRatio,4)), 
    168             size = 12, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1, 0.5, 0.5), fc=(1, 1, 1)))
    169         R = [ Rectangle((0,0),0,0, color = myColor(i / (2**dimOut))) for i in range(2**dimOut) ] + [ Rectangle((0,0),0,0, color = myColor(1)) ]
    170         plt.legend(R, [ "class" + str(i) for i in range(2**dimOut) ] + ["errorData"], loc=[0.85, 0.02], ncol=1, numpoints=1, framealpha = 1)
    171         
    172     fig.savefig("R:\dimIn" + str(dimIn) + "dimMid" + str(dimMid) + "dimOut" + str(dimOut) + ".png")
    173     plt.close()        
    174 
    175 if __name__ == '__main__':
    176     test(3,4,1)    
    177     test(3,4,2)
    178     test(3,4,3)    
    179     test(2,4,1)
    180     test(2,4,2)     
    181     test(2,3,1)        
    182     test(2,5,1)
    183     test(1,3,1)

    ● 输出结果,写明了输入维度,中间层维度,输出维度,前三个的训练论轮较多,偷懒打表了。可见数据维度越高训练时间越长,单纯增加或减少中间层的神经元个数不一定会改善精度或训练速度

    count =  1 , error =  0.009960528091961436
    dimIn = 3, dimMid = 4, dimOut = 1, errorRatio = 0.0157
    
    count =  1 , error =  0.009961970578632297
    dimIn = 3, dimMid = 4, dimOut = 2, errorRatio = 0.0221
    
    count =  500 , error =  0.01439582086937022
    dimIn = 3, dimMid = 4, dimOut = 3, errorRatio = 0.0119
    
    count =  185 , error =  0.01000959424865353
    count =  186 , error =  0.009996998960686504
    dimIn = 2, dimMid = 4, dimOut = 1, errorRatio = 0.0171
    
    count =  368 , error =  0.010011781526423453
    count =  369 , error =  0.009995998845193808
    dimIn = 2, dimMid = 4, dimOut = 2, errorRatio = 0.0164
    
    count =  183 , error =  0.01000216394730506
    count =  184 , error =  0.009989705432858783
    dimIn = 2, dimMid = 3, dimOut = 1, errorRatio = 0.0186
    
    count =  188 , error =  0.010007727220216402
    count =  189 , error =  0.009995233860763614
    dimIn = 2, dimMid = 5, dimOut = 1, errorRatio = 0.0171

    ● 画图:(3,4,1),(3,4,2),(3,4,3)

    ● 画图:(2,4,1),(2,4,2),(2,3,1),(2,5,1)

     ● 画图:(1,3,1)

  • 相关阅读:
    基于CORTEX-M的实时嵌入式系统
    FileZilla 安装配置参考
    【转】Difference between Point-To-Point and Publish/Subscribe JMS Messaging Models
    flush();close();dispose()
    work staff
    堆、栈、内存管理
    2013.8.1 读程序笔记
    C# static
    只包含schema的dll生成和引用方法
    4个月记
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/11135336.html
Copyright © 2011-2022 走看看