zoukankan      html  css  js  c++  java
  • fc_net.py cs231n

    n如果有错误,欢迎指出,不胜感激

    import numpy as np
    
    from cs231n.layers import *
    from cs231n.layer_utils import *
    
    
    class TwoLayerNet(object):
      """
      A two-layer fully-connected neural network with ReLU nonlinearity and
      softmax loss that uses a modular layer design. We assume an input dimension
      of D, a hidden dimension of H, and perform classification over C classes.
      
      The architecure should be affine - relu - affine - softmax.
    
      Note that this class does not implement gradient descent; instead, it
      will interact with a separate Solver object that is responsible for running
      optimization.
    
      The learnable parameters of the model are stored in the dictionary
      self.params that maps parameter names to numpy arrays.
      """
      
      def __init__(self, input_dim=3*32*32, hidden_dim=100, num_classes=10,
                   weight_scale=1e-3, reg=0.0):
        """
        Initialize a new network.
    
        Inputs:
        - input_dim: An integer giving the size of the input
        - hidden_dim: An integer giving the size of the hidden layer
        - num_classes: An integer giving the number of classes to classify
        - dropout: Scalar between 0 and 1 giving dropout strength.
        - weight_scale: Scalar giving the standard deviation for random
          initialization of the weights.
        - reg: Scalar giving L2 regularization strength.
        """
        self.params = {}
        self.reg = reg
    self.params['W1']=np.random.randn(input_dim,hidden_dim)*weight_scale self.params['b1']=np.zeros((hidden_dim,)) self.params['W2']=np.random.randn(hidden_dim,num_classes)*weight_scale self.params['b2']=np.zeros((num_classes,)) def loss(self, X, y=None): """ Compute loss and gradient for a minibatch of data. Inputs: - X: Array of input data of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,). y[i] gives the label for X[i]. Returns: If y is None, then run a test-time forward pass of the model and return: - scores: Array of shape (N, C) giving classification scores, where scores[i, c] is the classification score for X[i] and class c. If y is not None, then run a training-time forward and backward pass and return a tuple of: - loss: Scalar value giving the loss - grads: Dictionary with the same keys as self.params, mapping parameter names to gradients of the loss with respect to those parameters. """ scores = None out1,cache1=affine_relu_forward(X,self.params['W1'],self.params['b1']) out2,cache2=affine_forward(out1,self.params['W2'],self.params['b2']) if y is None : return out2 loss,dx=softmax_loss(out2,y) loss+=0.5*self.reg*( np.sum(self.params['W2']*self.params['W2'])+np.sum(self.params['W1']*self.params['W1']) ) grads={} dout1,grads['W2'],grads['b2']=affine_backward(dx,cache2) true_dx,grads['W1'],grads['b1']=affine_relu_backward(dout1,cache1) grads['W2']+=self.params['W2']*self.reg grads['W1']+=self.params['W1']*self.reg #grads['b2']+=self.params['b2']*self.reg #grads['b1']+=self.params['b1']*self.reg # If y is None then we are in test mode so just return scores # if y is None: # return scores # loss, grads = 0, {} return loss, grads class FullyConnectedNet(object): """ A fully-connected neural network with an arbitrary number of hidden layers, ReLU nonlinearities, and a softmax loss function. This will also implement dropout and batch normalization as options. For a network with L layers, the architecture will be {affine - [batch norm] - relu - [dropout]} x (L - 1) - affine - softmax where batch normalization and dropout are optional, and the {...} block is repeated L - 1 times. Similar to the TwoLayerNet above, learnable parameters are stored in the self.params dictionary and will be learned using the Solver class. """ def __init__(self, hidden_dims=[100], input_dim=3*32*32, num_classes=10, dropout=0, use_batchnorm=False, reg=0.0, weight_scale=1e-2, dtype=np.float32, seed=None): """ Initialize a new FullyConnectedNet. Inputs: - hidden_dims: A list of integers giving the size of each hidden layer. - input_dim: An integer giving the size of the input. - num_classes: An integer giving the number of classes to classify. - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then the network should not use dropout at all. - use_batchnorm: Whether or not the network should use batch normalization. - reg: Scalar giving L2 regularization strength. - weight_scale: Scalar giving the standard deviation for random initialization of the weights. - dtype: A numpy datatype object; all computations will be performed using this datatype. float32 is faster but less accurate, so you should use float64 for numeric gradient checking. - seed: If not None, then pass this random seed to the dropout layers. This will make the dropout layers deteriminstic so we can gradient check the model. """ self.use_batchnorm = use_batchnorm self.use_dropout = dropout > 0 self.reg = reg self.num_layers = 1 + len(hidden_dims) self.dtype = dtype self.params = {} self.num_layers=len(hidden_dims) num_layers=self.num_layers last_dims=input_dim for i in xrange(num_layers): self.params['W%d'%(i+1)]=np.random.randn(last_dims,hidden_dims[i])*weight_scale self.params['b%d'%(i+1)]=np.zeros(hidden_dims[i],) if self.use_batchnorm: self.params['beta%d'%(i+1)]=np.zeros(hidden_dims[i],) self.params['gamma%d'%(i+1)]=np.ones(hidden_dims[i],) last_dims=hidden_dims[i] self.params['W%d'%(num_layers+1)]=np.random.randn(last_dims,num_classes)*weight_scale self.params['b%d'%(num_layers+1)]=np.zeros(num_classes,) # When using dropout we need to pass a dropout_param dictionary to each # dropout layer so that the layer knows the dropout probability and the mode # (train / test). You can pass the same dropout_param to each dropout layer. self.dropout_param = {} if self.use_dropout: self.dropout_param = {'mode': 'train', 'p': dropout} if seed is not None: self.dropout_param['seed'] = seed # With batch normalization we need to keep track of running means and # variances, so we need to pass a special bn_param object to each batch # normalization layer. You should pass self.bn_params[0] to the forward pass # of the first batch normalization layer, self.bn_params[1] to the forward # pass of the second batch normalization layer, etc. self.bn_params = [] if self.use_batchnorm: self.bn_params = [{'mode': 'train'} for i in xrange(self.num_layers)] #print len(self.bn_params) # Cast all parameters to the correct datatype for k, v in self.params.iteritems(): self.params[k] = v.astype(dtype) def loss(self, X, y=None): """ Compute loss and gradient for the fully-connected net. Input / output: Same as TwoLayerNet above. """ X = X.astype(self.dtype) mode = 'test' if y is None else 'train' # Set train/test mode for batchnorm params and dropout param since they # behave differently during training and testing. if self.dropout_param is not None: self.dropout_param['mode'] = mode if self.use_batchnorm: for bn_param in self.bn_params: bn_param[mode] = mode scores = None cache={} num_layers=self.num_layers next=X for i in xrange(num_layers): next,cache['cache%d'%(i+1)]=affine_forward(next,self.params['W%d'%(i+1)],self.params['b%d'%(i+1)]) if self.use_batchnorm: next,cache['cachebn%d'%(i+1)]=batchnorm_forward(next,self.params['gamma%d'%(i+1)],self.params['beta%d'%(i+1)],self.bn_params[i]) next,cache['cacher%d'%(i+1)]=relu_forward(next) if self.use_dropout: next,cache['cached%d'%(i+1)]=dropout_forward(next,self.dropout_param) scores,cache['cache%d'%(num_layers+1)]=affine_forward(next,self.params['W%d'%(num_layers+1)],self.params['b%d'%(num_layers+1)]) # If test mode return early if mode == 'test': return scores loss, grads = 0.0, {} loss,dscores=softmax_loss(scores,y) for i in xrange(num_layers+1): loss+=np.sum(self.params['W%d'%(i+1)]**2)*0.5*self.reg dout=dscores dout,grads['W%d'%(num_layers+1)],grads['b%d'%(num_layers+1)]=affine_backward(dout,cache['cache%d'%(num_layers+1)]) grads['W%d'%(num_layers+1)]+=self.params['W%d'%(num_layers+1)]*self.reg for i in xrange(num_layers): i=num_layers-i if self.use_dropout: dout=dropout_backward(dout,cache['cached%d'%i]) dout=relu_backward(dout,cache['cacher%d'%i]) if self.use_batchnorm: #print i dout,grads['gamma%d'%i],grads['beta%d'%i]=batchnorm_backward_alt(dout,cache['cachebn%d'%i]) dout,grads['W%d'%i],grads['b%d'%i]=affine_backward(dout,cache['cache%d'%i]) # print "W%d s is "%(i)+str(grads['W%d'%i].shape) grads['W%d'%(i)]+=self.params['W%d'%(i)]*self.reg return loss, grads

      

    n

  • 相关阅读:
    Android中的Handler, Looper, MessageQueue和Thread
    ANR程序无响应原因及应对办法
    避免内存泄露的一些简单方法
    android内存泄露及OOM介绍
    listview异步加载图片优化
    利用convertView及viewHolder优化Adapter
    Sqlite介绍及其语句
    关于单页面和传统页面跳转的思考
    快速入门Vue
    JS编码
  • 原文地址:https://www.cnblogs.com/sfzyk/p/6733035.html
Copyright © 2011-2022 走看看