zoukankan      html  css  js  c++  java
  • 简易机器学习代码(LR,Kmeans,NN,RNN)

    Logistic Regression

    特别需要注意的是 exp 和 log 的使用。

    sigmoid 原始表达式为 1 / (1+exp(-z)),但如果直接使用 z=-710,会显示 overflow。因此对于 z<0 的情况,采用 exp(z) / (1 + exp(z)) ,这样一来,exp(-710) 就没问题了。这就是 scipy 包里的 expit 函数

    log_logistic = log(sigmoid),注意和 expit 函数是一致的,分情况讨论。

     1 import numpy as np
     2 from scipy.special import expit
     3 from sklearn.utils.extmath import log_logistic
     4 
     5 def predict(theta, x):
     6     return expit(x.dot(theta))
     7 
     8 def compute_loss(y, yz):
     9     return - np.sum(log_logistic(yz))
    10 
    11 def gradientdescent(x, y, theta, iterations=2000, lr=0.01):
    12     loss_list = []
    13     for i in range(iterations):
    14         yhat = predict(theta, x)
    15         delta = x.T.dot(yhat - y) / m
    16         loss = compute_loss(y, y * X.dot(theta))
    17         loss_list.append(loss)
    18         theta = theta - lr * delta
    19     return theta, loss_list
    20 
    21 theta, loss_list = gradientdescent(X, y, np.zeros((n, 1)))

    Kmeans

    Kmeans 的本质就是 EM 算法,只不过是硬间隔而不是软间隔。首先初始化 K 个中心点,在 E 步,将样本分配到最近的中心点,在 M 步,选取新的中心点以最小化组内距离。

     1 import numpy as np
     2 
     3 
     4 def calc_dist(x1, x2):
     5     return sum([(x1[i] - x2[i])**2 for i in range(len(x1))])
     6 
     7 
     8 # Assign samples to given centers
     9 def E_step(X, cents):
    10     cent_dict = dict(zip(cents, [[] for _ in range(len(cents))]))
    11     for row in X:
    12         min_dist, best_cent = 1e10, None
    13         for cent in cent_dict:
    14             dist = calc_dist(row, cent)
    15             if dist < min_dist:
    16                 min_dist = dist
    17                 best_cent = cent
    18         cent_dict[best_cent] += [row.tolist()]
    19     return cent_dict
    20     
    21 
    22 # Compute new centers
    23 def M_step(cent_dict):
    24     new_cents = []
    25     for cent in cent_dict:
    26         new_cent = np.mean(np.array(cent_dict[cent]), axis=0)
    27         new_cents.append(tuple(new_cent))
    28     return new_cents
    29 
    30 
    31 def Kmeans(X, K=3, max_iter=10):
    32     np.random.seed(1)
    33     inds = np.random.choice(len(X), K)
    34     init_cents = [tuple(X[i]) for i in inds]
    35     cents = init_cents
    36     for k in range(max_iter):
    37         cent_dict = E_step(X, cents)
    38         new_cents = M_step(cent_dict)
    39         move = sum([calc_dist(c1, c2) for c1, c2 in zip(cents, new_cents)])
    40         if move < 0.1:
    41             print('Converged in %s steps' % k)
    42             break
    43         cents = new_cents
    44     return cent_dict

    Neural Network

    注意 softmax 的计算,需要考虑到 exp 的 overflow。因此通常会在 softmax 分子分母同时乘上一个常数 C,log(C) = -max(z),这就是 shift_score。

    这里使用了 scipy 包里的 logsumexp,理由同 LR,logsumexp = log(sum(exp(z)))。

     1 from scipy.special import logsumexp
     2 import numpy as np
     3 
     4 
     5 
     6 class Neural_Network:
     7     
     8 
     9     def __init__(self, n, h, c, std=1e-4):
    10         W1 = np.random.randn(n, h) * std
    11         b1 = np.zeros(h)
    12         W2 = np.random.randn(h, c) * std
    13         b2 = np.zeros(c)
    14         self.params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    15         
    16         
    17     def forward_backward_prop(self, X, y):
    18         W1, b1 = self.params['W1'], self.params['b1']
    19         W2, b2 = self.params['W2'], self.params['b2']
    20         
    21         # forward prop
    22         hidden = X.dot(W1) + b1
    23         relu = np.maximum(0, hidden)
    24         scores = relu.dot(W2) + b2
    25         shift_scores = scores - np.max(scores, axis=1, keepdims=True)
    26         softmax = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1, keepdims=True)
    27         loss = - np.sum(y * (shift_scores - logsumexp(shift_scores, axis=1, keepdims=True))) / X.shape[0]
    28         
    29         # backward prop
    30         dscores = (softmax - y) / X.shape[0]
    31         drelu = dscores.dot(W2.T)
    32         dW2 = relu.T.dot(dscores)
    33         db2 = np.sum(dscores, axis=0)
    34         dhidden = (hidden > 0) * drelu
    35         dW1 = X.T.dot(dhidden)
    36         db1 = np.sum(dhidden, axis=0)
    37         
    38         grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}
    39         
    40         return loss, grads
    41         
    42     
    43     def train(self, X, y, lr=0.01, decay=0.95, iters=5000):
    44         loss_list, acc_list = [], []
    45         for it in range(iters):
    46             loss, grads = self.forward_backward_prop(X, y)
    47             loss_list.append(loss)
    48             self.params['W1'] -= lr * grads['dW1']
    49             self.params['b1'] -= lr * grads['db1']
    50             self.params['W2'] -= lr * grads['dW2']
    51             self.params['b2'] -= lr * grads['db2']
    52             
    53             if it % 100 == 0:
    54                 yhat = self.predict(X)
    55                 acc = np.sum(np.argmax(y, axis=1) == yhat) / X.shape[0]
    56                 acc_list.append(acc)
    57                 lr *= decay
    58                 
    59         return loss_list, acc_list
    60     
    61     
    62     def predict(self, X):
    63         hidden = X.dot(self.params['W1']) + self.params['b1']
    64         relu = np.maximum(0, hidden)
    65         scores = relu.dot(self.params['W2']) + self.params['b2']
    66         yhat = np.argmax(scores, axis=1)
    67         return yhat

    Recurrent Neural Network

      1 import numpy as np
      2 
      3 
      4 
      5 def tanh(x):
      6     return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
      7 
      8 def softmax(x):
      9     ex = np.exp(x - np.max(x))
     10     return ex / ex.sum(axis=0)
     11 
     12 
     13 
     14 class RNN:
     15     
     16 
     17     def __init__(self, na, nx, ny, m, seed=1):
     18         np.random.seed(seed)
     19         Waa = np.random.randn(na, na)
     20         Wax = np.random.randn(na, nx)
     21         Wya = np.random.randn(ny, na)
     22         ba = np.random.randn(na, 1)
     23         by = np.random.randn(ny, 1)
     24         self.a0 = np.random.randn(na, m)
     25         self.params = {'Waa': Waa, 'Wax': Wax, 'Wya': Wya, 'ba': ba, 'by': by}
     26     
     27     
     28     def RNN_cell_forward(self, xt, a_prev):
     29         """
     30         Inputs:
     31         xt -- Current input data, of shape (nx, m).
     32         a_prev -- Previous hidden state, of shape (na, m)
     33         
     34         Outputs:
     35         at -- Current hidden state, of shape (na, m)
     36         yt -- Current prediction, of shape (ny, m)
     37         """
     38         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
     39         Wya, by = self.params['Wya'], self.params['by']
     40         
     41         at = tanh(Waa.dot(a_prev) + Wax.dot(xt) + ba)
     42         score = Wya.dot(at) + by
     43         yt = softmax(score)
     44         return at, yt
     45     
     46     
     47     def RNN_forward(self, X, y):
     48         """
     49         Inputs:
     50         X -- Input data for every time step, of shape (nx, m, Tx)
     51         y -- Target for every time step, of shape (ny, m, Tx)
     52         
     53         Outputs:
     54         a -- Hidden states for every time-step, of shape (n_a, m, T_x)
     55         yhat -- Predictions for every time-step, of shape (n_y, m, T_x)
     56         """
     57         a_prev = self.a0
     58         na, m = a_prev.shape
     59         ny = y.shape[0]
     60         Tx = X.shape[2]
     61         
     62         a = np.zeros((na, m, Tx))
     63         yhat = np.zeros((ny, m, Tx))
     64         loss = 0
     65         for t in range(Tx):
     66             a_next, yt = self.RNN_cell_forward(X[:, :, t], a_prev)
     67             yhat[:, :, t] = yt
     68             a[:, :, t] = a_next
     69             loss -= np.sum(np.log(yt.T.dot(y[:, :, t])))
     70             a_prev = a_next
     71             
     72         cache = (a, yhat)
     73         return loss, cache
     74     
     75     
     76     def RNN_cell_backward(self, dz, grads, cache):
     77         """
     78         Inputs:
     79         dz -- Gradient of loss with respect to score
     80         grads -- Dictionary contains all gradients
     81         cache -- Tuple contains xt, a_next, a_prev
     82         
     83         Outputs:
     84         grads -- Dictionary contains all gradients
     85         """
     86         xt, a_next, a_prev = cache
     87         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
     88         Wya, by = self.params['Wya'], self.params['by']
     89         
     90         grads['dWya'] += dz.dot(a_next.T)
     91         grads['dby'] += np.sum(dz, axis=1, keepdims=True)
     92         da_y = Wya.T.dot(dz) 
     93         da_a = grads['da_prev']
     94         da_next = da_y + da_a      # da is computed based on two paths, from da_y and da_a.
     95         dtanh = (1 - a_next**2) * da_next
     96         grads['dWaa'] += dtanh.dot(a_prev.T)
     97         grads['da_prev'] = Waa.T.dot(dtanh)
     98         grads['dWax'] += dtanh.dot(xt.T)
     99         grads['dba'] += np.sum(dtanh, axis=1, keepdims=True)
    100         
    101         return grads
    102     
    103     
    104     def RNN_backward(self, X, y, cache):
    105         """
    106         Inputs:
    107         X -- Input data for every time step, of shape (nx, m, Tx)
    108         y -- Target for every time step, of shape (ny, m, Tx)
    109         cache -- Tuple from RNN_forward, contains a, yhat
    110         
    111         Outputs:
    112         grads -- Dictionary contains all gradients
    113         a -- Hidden states for every time-step, of shape (n_a, m, T_x)
    114         """
    115         a, yhat = cache
    116         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
    117         Wya, by = self.params['Wya'], self.params['by']
    118         Tx = X.shape[2]
    119         
    120         grads = {}
    121         grads['dWya'], grads['dby'] = np.zeros_like(Wya), np.zeros_like(by)
    122         grads['dWaa'], grads['da_prev'] = np.zeros_like(Waa), np.zeros_like(self.a0)
    123         grads['dWax'], grads['dba'] = np.zeros_like(Wax), np.zeros_like(ba)
    124         
    125         for t in reversed(range(Tx)):
    126             # compute gradient of loss wrt score
    127             dz = yhat[:, :, t] - y[:, :, t]
    128             cell_cache = X[:, :, t], a[:, :, t], a[:, :, t-1]
    129             grads = self.RNN_cell_backward(dz, grads, cell_cache)
    130         
    131         return grads, a
    132     
    133     
    134     def update_parameters(self, grads, lr):
    135         self.params['Wax'] -= lr * grads['dWax']
    136         self.params['Waa'] -= lr * grads['dWaa']
    137         self.params['Wya'] -= lr * grads['dWya']
    138         self.params['ba']  -= lr * grads['dba']
    139         self.params['by']  -= lr * grads['dby']
    140     
    141     
    142     def clip(self, grads, maxValue):
    143         for key in ['dWax', 'dWaa', 'dWya', 'dba', 'dby']:
    144             gradient = grads[key]
    145             grads[key] = np.clip(gradient, -maxValue, maxValue, out=gradient)
    146         return grads
    147     
    148     
    149     def train(self, X, y, lr, iters=1):
    150         loss_list = []
    151         for it in range(iters):
    152             loss, cache = self.RNN_forward(X, y)
    153             grads, a = self.RNN_backward(X, y, cache)
    154             # Clip gradients between -5 (min) and 5 (max)
    155             grads = self.clip(grads, 5)
    156             self.update_parameters(grads, lr)
    157             loss_list.append(loss)
    158         return loss, grads, a
  • 相关阅读:
    【题解】JSOI2009游戏
    【考试记录】4.8 Path (网络流 —— 劲题)
    【考试记录】4.8 Table ( 数论数学 --组合数 & 杨辉三角)
    【题解】HNOI2016树
    【算法】最小乘积生成树 & 最小乘积匹配 (HNOI2014画框)
    【加油!】
    [bzoj4916] 神犇和蒟蒻 [杜教筛]
    [CQOI2015][bzoj3930] 选数 [杜教筛+莫比乌斯反演]
    [luogu3768] 简单的数学题 [杜教筛]
    春季学习记录
  • 原文地址:https://www.cnblogs.com/muyanchengfan/p/9599702.html
Copyright © 2011-2022 走看看