zoukankan html css js c++ java
fc_net.py cs231n

n如果有错误，欢迎指出，不胜感激
import numpy as np

from cs231n.layers import *
from cs231n.layer_utils import *


class TwoLayerNet(object):
  """
  A two-layer fully-connected neural network with ReLU nonlinearity and
  softmax loss that uses a modular layer design. We assume an input dimension
  of D, a hidden dimension of H, and perform classification over C classes.
  
  The architecure should be affine - relu - affine - softmax.

  Note that this class does not implement gradient descent; instead, it
  will interact with a separate Solver object that is responsible for running
  optimization.

  The learnable parameters of the model are stored in the dictionary
  self.params that maps parameter names to numpy arrays.
  """
  
  def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
               weight_scale=1e-3, reg=0.0):
    """
    Initialize a new network.

    Inputs:
    - input_dim: An integer giving the size of the input
    - hidden_dim: An integer giving the size of the hidden layer
    - num_classes: An integer giving the number of classes to classify
    - dropout: Scalar between 0 and 1 giving dropout strength.
    - weight_scale: Scalar giving the standard deviation for random
      initialization of the weights.
    - reg: Scalar giving L2 regularization strength.
    """
    self.params = {}
    self.reg = reg

    self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale
    self.params['b1']=np.zeros((hidden_dim,))
    self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale
    self.params['b2']=np.zeros((num_classes,))


  def loss(self, X, y=None):
    """
    Compute loss and gradient for a minibatch of data.

    Inputs:
    - X: Array of input data of shape (N, d_1, ..., d_k)
    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:
    If y is None, then run a test-time forward pass of the model and return:
    - scores: Array of shape (N, C) giving classification scores, where
      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and
    return a tuple of:
    - loss: Scalar value giving the loss
    - grads: Dictionary with the same keys as self.params, mapping parameter
      names to gradients of the loss with respect to those parameters.
    """  
    scores = None
    out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1'])
    out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2'])
    if y is None :
        return out2
    
    loss,dx=softmax_loss(out2,y)
    loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) )
    grads={}
    dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2)
    true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1)
    
    grads['W2']+=self.params['W2']*self.reg
    grads['W1']+=self.params['W1']*self.reg
    #grads['b2']+=self.params['b2']*self.reg
    #grads['b1']+=self.params['b1']*self.reg
    

    # If y is None then we are in test mode so just return scores
 #   if y is None:
 #     return scores
    
 #   loss, grads = 0, {}

    return loss, grads


class FullyConnectedNet(object):
  """
  A fully-connected neural network with an arbitrary number of hidden layers,
  ReLU nonlinearities, and a softmax loss function. This will also implement
  dropout and batch normalization as options. For a network with L layers,
  the architecture will be
  
  {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax
  
  where batch normalization and dropout are optional, and the {...} block is
  repeated L - 1 times.
  
  Similar to the TwoLayerNet above, learnable parameters are stored in the
  self.params dictionary and will be learned using the Solver class.
  """

  def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10,
               dropout=0, use_batchnorm=False, reg=0.0,
               weight_scale=1e-2, dtype=np.float32, seed=None):
    """
    Initialize a new FullyConnectedNet.
    
    Inputs:
    - hidden_dims: A list of integers giving the size of each hidden layer.
    - input_dim: An integer giving the size of the input.
    - num_classes: An integer giving the number of classes to classify.
    - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
      the network should not use dropout at all.
    - use_batchnorm: Whether or not the network should use batch normalization.
    - reg: Scalar giving L2 regularization strength.
    - weight_scale: Scalar giving the standard deviation for random
      initialization of the weights.
    - dtype: A numpy datatype object; all computations will be performed using
      this datatype. float32 is faster but less accurate, so you should use
      float64 for numeric gradient checking.
    - seed: If not None, then pass this random seed to the dropout layers. This
      will make the dropout layers deteriminstic so we can gradient check the
      model.
    """
    self.use_batchnorm = use_batchnorm
    self.use_dropout = dropout > 0
    self.reg = reg
    self.num_layers = 1 + len(hidden_dims)
    self.dtype = dtype
    self.params = {}
    
    self.num_layers=len(hidden_dims)
    
    num_layers=self.num_layers
    
    last_dims=input_dim
    for i in xrange(num_layers):
        self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale
        self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],)
        if self.use_batchnorm:
            self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],)
            self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],)
        last_dims=hidden_dims[i]

    self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale
    self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,)

    # When using dropout we need to pass a dropout_param dictionary to each
    # dropout layer so that the layer knows the dropout probability and the mode
    # (train / test). You can pass the same dropout_param to each dropout layer.
    self.dropout_param = {}
    if self.use_dropout:
      self.dropout_param = {'mode': 'train', 'p': dropout}
      if seed is not None:
        self.dropout_param['seed'] = seed
    
    # With batch normalization we need to keep track of running means and
    # variances, so we need to pass a special bn_param object to each batch
    # normalization layer. You should pass self.bn_params[0] to the forward pass
    # of the first batch normalization layer, self.bn_params[1] to the forward
    # pass of the second batch normalization layer, etc.
    self.bn_params = []
    if self.use_batchnorm:
      self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)]
    #print len(self.bn_params)
    
    # Cast all parameters to the correct datatype
    for k, v in self.params.iteritems():
      self.params[k] = v.astype(dtype)


  def loss(self, X, y=None):
    """
    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.
    """
    X = X.astype(self.dtype)
    mode = 'test' if y is None else 'train'

    # Set train/test mode for batchnorm params and dropout param since they
    # behave differently during training and testing.
    if self.dropout_param is not None:
      self.dropout_param['mode'] = mode   
    
    if self.use_batchnorm:
      for bn_param in self.bn_params:
        bn_param[mode] = mode

    scores = None
    cache={}
    num_layers=self.num_layers
    next=X
    for i in xrange(num_layers):
        
        next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)])
        
        if self.use_batchnorm:
            
            next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i])
        
        next,cache['cacher%d'%(i+1)]=relu_forward(next)
        if self.use_dropout:
            next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param)
    
    scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)])

    # If test mode return early
    
    if mode == 'test':
      return scores

    loss, grads = 0.0, {}
    loss,dscores=softmax_loss(scores,y)
    for i in xrange(num_layers+1):
        loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg
    dout=dscores
    
    dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)])
    
    grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg
    
    for i in xrange(num_layers):
        i=num_layers-i
        if self.use_dropout:
            dout=dropout_backward(dout,cache['cached%d'%i])
        dout=relu_backward(dout,cache['cacher%d'%i])
        if self.use_batchnorm:
            #print i
            dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i])
            
        dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i])
        # print "W%d s is "%(i)+str(grads['W%d'%i].shape)
        
        grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg
  
    return loss, grads
查看全文
相关阅读:
BLE编程中关键步骤
 gradle相关配置内容解析
 Gradle版本变更的问题
 【问题】AndroidStudio导入项目一直卡在Building gradle project infod的最快速解决方案
 jdbc.properties各种数据库连接配置
 EL表达式语言总结
 Android sdk目录介绍
 chrome的常用快捷键和命令
 Unity Hub for Mac 破解
 MAC下安装配置Tomcat
原文地址：https://www.cnblogs.com/sfzyk/p/6733035.html