zoukankan      html  css  js  c++  java
  • 模型搭建练习1_用numpy和tensor、variable实现前后向传播、实现激活函数

    用numpy实现搭建一个简单的forward和backward

     1 import numpy as np
     2 N, D_in, H, D_out = 64, 1000, 100, 10
     3 x = np.random.randn(N, D_in)      # (64, 1000)
     4 y = np.random.randn(N, D_out)     # (64, 10)
     5 w1 = np.random.randn(D_in, H)     # (1000, 100)
     6 w2 = np.random.randn(H, D_out)    # (100, 10)
     7 learning_rate = 1e-6
     8 
     9 for t in range(2):
    10     # Forward pass: compute predicted y
    11     h = x.dot(w1)                  # (64, 100)
    12     h_relu = np.maximum(h, 0)      # (64, 100) 实现relu函数功能
    13     y_pred = h_relu.dot(w2)        # (64, 10)
    14 
    15     loss = np.square(y_pred - y).sum()      # sum()所有元素求和
    16     # Backprop to compute gradients of w1 and w2 with respect to loss
    17     grad_y_pred = 2.0 * (y_pred - y)
    18     grad_w2 = h_relu.T.dot(grad_y_pred)
    19     grad_h_relu = grad_y_pred.dot(w2.T)
    20     grad_h = grad_h_relu.copy()             # (64, 100)
    21     grad_h[h < 0] = 0               # 在h中负元素对应位置处grad_h中置0 -> 实现relu函数功能
    22     grad_w1 = x.T.dot(grad_h)       # .T是转置 (1000, 100)
    23 
    24     # Update weights
    25     w1 -= learning_rate * grad_w1           # (1000, 100)
    26     w2 -= learning_rate * grad_w2

    用tensor实现搭建一个简单的forward和backward

     1 import torch
     2 
     3 dtype = torch.FloatTensor
     4 # dtype = torch.cuda.FloatTensor
     5 
     6 # N is batch size; D_in is input dimension;
     7 # H is hidden dimension; D_out is output dimension.
     8 N, D_in, H, D_out = 64, 1000, 100, 10
     9 
    10 x = torch.randn(N, D_in).type(dtype)
    11 y = torch.randn(N, D_out).type(dtype)
    12 
    13 # Randomly initialize weights
    14 w1 = torch.randn(D_in, H).type(dtype)
    15 w2 = torch.randn(H, D_out).type(dtype)
    16 
    17 learning_rate = 1e-6
    18 for t in range(500):
    19     # Forward pass: compute predicted y
    20     h = x.mm(w1)        # 与numpy对比,dot点乘
    21     h_relu = h.clamp(min=0)
    22     y_pred = h_relu.mm(w2)
    23 
    24     loss = (y_pred - y).pow(2).sum()
    25     # Backprop to compute gradients of w1 and w2 with respect to loss
    26     grad_y_pred = 2.0 * (y_pred - y)
    27     grad_w2 = h_relu.t().mm(grad_y_pred)
    28     grad_h_relu = grad_y_pred.mm(w2.t())
    29     grad_h = grad_h_relu.clone()
    30     grad_h[h < 0] = 0
    31     grad_w1 = x.t().mm(grad_h)
    32 
    33     # Update weights using gradient descent
    34     w1 -= learning_rate * grad_w1
    35     w2 -= learning_rate * grad_w2

     用variable实现forward和backward

     1 # use PyTorch Variables and autograd to implement our two-layer network; 
    # now we no longer need to manually implement the backward pass through the network
    2 3 import torch 4 from torch.autograd import Variable 5 6 dtype = torch.FloatTensor 7 N, D_in, H, D_out = 64, 1000, 100, 10 8 9 # Setting requires_grad=False indicates that we do not need to compute gradients with respect to these Variables during the backward pass. 10 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False) 11 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False) 12 13 # Setting requires_grad=True indicates that we want to compute gradients with respect to these Variables during the backward pass. 14 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True) 15 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True) 16 17 learning_rate = 1e-6 18 for t in range(2): 19 # Forward pass: we do not need to keep references to intermediate values since we are not implementing the backward pass by hand 20 y_pred = x.mm(w1).clamp(min=0).mm(w2) 21 22 # Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape (1,); loss.data[0] is a scalar value holding the loss. 23 loss = (y_pred - y).pow(2).sum() 24 # print(loss) # [torch.FloatTensor of size 1] 25 # print(loss.size()) # torch.Size([1]) 26 # print(loss.data) # [torch.FloatTensor of size 1] 27 print(loss.data[0]) 28 29 loss.backward() 30 31 w1.data -= learning_rate * w1.grad.data 32 w2.data -= learning_rate * w2.grad.data 33 34 w1.grad.data.zero_() 35 w2.grad.data.zero_()

    用variable实现relu函数

     1 import torch
     2 from torch.autograd import Variable
     3 
     4 class MyReLU(torch.autograd.Function):
     5     def forward(self, input):
     6         self.save_for_backward(input)
     7         return input.clamp(min=0)
     8 
     9     def backward(self, grad_output):
    10         input, = self.saved_tensors
    11         grad_input = grad_output.clone()
    12         grad_input[input < 0] = 0
    13         return grad_input
    14 
    15 dtype = torch.FloatTensor
    16 N, D_in, H, D_out = 64, 1000, 100, 10
    17 
    18 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
    19 y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
    20 
    21 w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
    22 w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)
    23 
    24 learning_rate = 1e-6
    25 for t in range(2):
    26     relu = MyReLU()
    27 
    28     # Forward pass
    29     y_pred = relu(x.mm(w1)).mm(w2)
    30 
    31     loss = (y_pred - y).pow(2).sum()
    32     loss.backward()
    33 
    34     w1.data -= learning_rate * w1.grad.data
    35     w2.data -= learning_rate * w2.grad.data
    36 
    37     w1.grad.data.zero_()
    38     w2.grad.data.zero_()
  • 相关阅读:
    Linux下多线程查看工具(pstree、ps、pstack)
    linux的netstat命令详解
    linux的netstat命令详解
    实例解说Linux命令行uniq
    实例解说Linux命令行uniq
    实例解说Linux命令行uniq
    linux之sort用法
    linux之sort用法
    linux之sort用法
    oracle服务器和客户端字符集的查看和修改
  • 原文地址:https://www.cnblogs.com/Joyce-song94/p/7474571.html
Copyright © 2011-2022 走看看